mirror of
https://e.coding.net/circlecloud/AliKeywordSearch.git
synced 2024-11-16 00:48:59 +00:00
326 lines
11 KiB
C#
326 lines
11 KiB
C#
|
using System;
|
|||
|
using System.Collections;
|
|||
|
using System.Diagnostics;
|
|||
|
using System.IO;
|
|||
|
using System.Net;
|
|||
|
using System.Net.Security;
|
|||
|
using System.Net.Sockets;
|
|||
|
using System.Security.Authentication;
|
|||
|
using System.Security.Cryptography.X509Certificates;
|
|||
|
using System.Text;
|
|||
|
using System.Text.RegularExpressions;
|
|||
|
using System.Threading;
|
|||
|
/************************************************************************/
|
|||
|
/* Author:huliang
|
|||
|
* Email:huliang@yahoo.cn
|
|||
|
* QQ:12658501
|
|||
|
* 说明:转载请注明出处
|
|||
|
/************************************************************************/
|
|||
|
|
|||
|
namespace EnAliKeywordSearch
|
|||
|
{
|
|||
|
public class HttpArgs
|
|||
|
{
|
|||
|
public string Url { get; set; }
|
|||
|
public string Host { get; set; }
|
|||
|
public int Port { get; set; }
|
|||
|
public string Accept { get; set; }
|
|||
|
public string Referer { get; set; }
|
|||
|
public string Cookie { get; set; }
|
|||
|
public string Data { get; set; }
|
|||
|
public string UA { get; set; }
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
public static class HttpHelper
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// 提交方法
|
|||
|
/// </summary>
|
|||
|
enum HttpMethod
|
|||
|
{
|
|||
|
GET,
|
|||
|
POST
|
|||
|
}
|
|||
|
|
|||
|
#region HttpWebRequest & HttpWebResponse
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Get方法
|
|||
|
/// </summary>
|
|||
|
/// <param name="geturl">请求地址</param>
|
|||
|
/// <param name="cookieser">Cookies存储器</param>
|
|||
|
/// <returns>请求返回的Stream</returns>
|
|||
|
public static string Get(string url)
|
|||
|
{
|
|||
|
HttpArgs args = ParseURL(url);
|
|||
|
string strhtml = InternalSocketHttp(args, HttpMethod.GET);
|
|||
|
return strhtml;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Post方法
|
|||
|
/// </summary>
|
|||
|
/// <param name="posturl">请求地址</param>
|
|||
|
/// <param name="bytes">Post数据</param>
|
|||
|
/// <param name="cookieser">Cllkies存储器</param>
|
|||
|
/// <returns>请求返回的流</returns>
|
|||
|
public static string Post(string url,
|
|||
|
byte[] bytes,
|
|||
|
CookieContainer cookies,
|
|||
|
Encoding encoding)
|
|||
|
{
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// 根据Url得到host
|
|||
|
/// </summary>
|
|||
|
/// <param name="strUrl">url字符串</param>
|
|||
|
/// <returns>host字符串</returns>
|
|||
|
private static HttpArgs ParseURL(string strUrl)
|
|||
|
{
|
|||
|
HttpArgs args = new HttpArgs();
|
|||
|
|
|||
|
args.Host = "";
|
|||
|
args.Port = 80;
|
|||
|
args.Referer = "";
|
|||
|
args.Cookie = "";
|
|||
|
args.Url = "";
|
|||
|
args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;";
|
|||
|
args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";
|
|||
|
|
|||
|
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
|
|||
|
int iIndex = strUrl.IndexOf(@"//");
|
|||
|
if (iIndex <= 0)
|
|||
|
return null;
|
|||
|
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
|
|||
|
string nohttpurl = strUrl.Substring(iIndex + 2);
|
|||
|
string address = nohttpurl;
|
|||
|
iIndex = nohttpurl.IndexOf(@"/");
|
|||
|
if (iIndex > 0)
|
|||
|
{
|
|||
|
//www.alibaba.com:80
|
|||
|
address = nohttpurl.Substring(0, iIndex);
|
|||
|
args.Url = nohttpurl.Substring(iIndex);
|
|||
|
}
|
|||
|
iIndex = nohttpurl.IndexOf(@":");
|
|||
|
if (iIndex > 0)
|
|||
|
{
|
|||
|
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":"));
|
|||
|
args.Host = tempargs[0];
|
|||
|
args.Port = int.Parse(tempargs[1]);
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
//www.alibaba.com:80
|
|||
|
args.Host = address;
|
|||
|
args.Port = 80;
|
|||
|
}
|
|||
|
return args;
|
|||
|
}
|
|||
|
#endregion
|
|||
|
|
|||
|
#region Socket
|
|||
|
|
|||
|
public static string Get(HttpArgs args)
|
|||
|
{
|
|||
|
return InternalSocketHttp(args, HttpMethod.GET);
|
|||
|
}
|
|||
|
|
|||
|
public static string Post(IPEndPoint endpoint,
|
|||
|
HttpArgs args)
|
|||
|
{
|
|||
|
return InternalSocketHttp(args, HttpMethod.POST);
|
|||
|
}
|
|||
|
|
|||
|
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
|
|||
|
{
|
|||
|
|
|||
|
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
|
|||
|
{
|
|||
|
try
|
|||
|
{
|
|||
|
sK.Connect(args.Host, args.Port);
|
|||
|
if (sK.Connected)
|
|||
|
{
|
|||
|
byte[] buff = ParseHttpArgs(method, args);
|
|||
|
if (sK.Send(buff) > 0)
|
|||
|
{
|
|||
|
string html = ParseResponse(sK, args);
|
|||
|
return html;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
Console.WriteLine(ex.Message);
|
|||
|
}
|
|||
|
}
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
private static string ParseResponse(Socket sK, HttpArgs args)
|
|||
|
{
|
|||
|
string header = ReadHeaderProcess(sK);
|
|||
|
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
|
|||
|
{
|
|||
|
return string.Empty;
|
|||
|
}
|
|||
|
if (header.StartsWith("HTTP/1.1 302"))
|
|||
|
{
|
|||
|
int start = header
|
|||
|
.ToUpper().IndexOf("LOCATION");
|
|||
|
if (start > 0)
|
|||
|
{
|
|||
|
string temp = header.Substring(start, header.Length - start);
|
|||
|
string[] sArry = Regex.Split(temp, "\r\n");
|
|||
|
args.Url = sArry[0].Remove(0, 10);
|
|||
|
return Get(args); //注意:302协议需要重定向
|
|||
|
}
|
|||
|
}
|
|||
|
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
|
|||
|
{
|
|||
|
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
|
|||
|
int content_length = 0;
|
|||
|
if (start > 0)
|
|||
|
{
|
|||
|
string temp = header.Substring(start, header.Length - start);
|
|||
|
string[] sArry = Regex.Split(temp, "\r\n");
|
|||
|
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
|
|||
|
if (content_length > 0)
|
|||
|
{
|
|||
|
byte[] bytes = new byte[content_length];
|
|||
|
if (sK.Receive(bytes) > 0)
|
|||
|
{
|
|||
|
return Encoding.Default.GetString(bytes);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
//不存在Content-Length协议头
|
|||
|
return ParseResponse(sK);
|
|||
|
}
|
|||
|
}
|
|||
|
return string.Empty;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// 读取协议头
|
|||
|
/// </summary>
|
|||
|
/// <param name="args"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static string ReadHeaderProcess(Socket sK)
|
|||
|
{
|
|||
|
StringBuilder bulider = new StringBuilder();
|
|||
|
while (true)
|
|||
|
{
|
|||
|
byte[] buff = new byte[1];
|
|||
|
int read = sK.Receive(buff, SocketFlags.None);
|
|||
|
if (read > 0)
|
|||
|
{
|
|||
|
bulider.Append((char)buff[0]);
|
|||
|
}
|
|||
|
string temp = bulider.ToString();
|
|||
|
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
|
|||
|
{
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
return bulider.ToString();
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 注意:此函数可能产生死循环
|
|||
|
/// </summary>
|
|||
|
/// <param name="ssl"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static string ParseResponse(Socket sK)
|
|||
|
{
|
|||
|
StringBuilder bulider = new StringBuilder();
|
|||
|
byte[] buff = new byte[1024];
|
|||
|
int len = sK.Receive(buff);
|
|||
|
string temp = string.Empty;
|
|||
|
while (len > 0)
|
|||
|
{
|
|||
|
byte[] reads = new byte[len];
|
|||
|
Array.Copy(buff, 0, reads, 0, len);
|
|||
|
bulider.Append(Encoding.Default.GetString(reads));
|
|||
|
temp = bulider.ToString();
|
|||
|
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
|
|||
|
{
|
|||
|
break;
|
|||
|
}
|
|||
|
len = sK.Receive(buff);
|
|||
|
}
|
|||
|
return temp;
|
|||
|
}
|
|||
|
#endregion
|
|||
|
|
|||
|
#region Helper
|
|||
|
|
|||
|
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args)
|
|||
|
{
|
|||
|
StringBuilder bulider = new StringBuilder();
|
|||
|
if (method.Equals(HttpMethod.POST))
|
|||
|
{
|
|||
|
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
|
|||
|
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
bulider.AppendLine(string.Format("GET {0} HTTP/1.1", args.Url));
|
|||
|
}
|
|||
|
|
|||
|
bulider.AppendLine(string.Format("Host: {0}:{1}", args.Host, args.Port));
|
|||
|
|
|||
|
bulider.AppendLine("User-Agent: " + args.UA);
|
|||
|
//"User-Agent: Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";Mozilla/5.0 (Windows NT 6.1; IE 9.0)
|
|||
|
if (!string.IsNullOrEmpty(args.Referer))
|
|||
|
bulider.AppendLine(string.Format("Referer: {0}", args.Referer));
|
|||
|
|
|||
|
//bulider.AppendLine("Connection: close");
|
|||
|
|
|||
|
if (!string.IsNullOrEmpty(args.Accept))
|
|||
|
bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
|
|||
|
|
|||
|
if (!string.IsNullOrEmpty(args.Cookie))
|
|||
|
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
|
|||
|
|
|||
|
if (method.Equals(HttpMethod.POST))
|
|||
|
{
|
|||
|
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
|
|||
|
bulider.Append(args.Data);
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
bulider.Append("\r\n");
|
|||
|
}
|
|||
|
|
|||
|
string header = bulider.ToString();
|
|||
|
return Encoding.Default.GetBytes(header);
|
|||
|
}
|
|||
|
#endregion
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
public class MilliTimer
|
|||
|
{
|
|||
|
private static double times { get; set; }
|
|||
|
public static void start()
|
|||
|
{
|
|||
|
times = getTotalMilliseconds();
|
|||
|
}
|
|||
|
|
|||
|
public static double getTimes()
|
|||
|
{
|
|||
|
return getTotalMilliseconds() - times;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
public static double getTotalMilliseconds()
|
|||
|
{
|
|||
|
return DateTime.Now.Subtract(DateTime.Parse("1970-1-1")).TotalMilliseconds;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|