mirror of
https://e.coding.net/circlecloud/AliKeywordSearch.git
synced 2024-11-16 00:48:59 +00:00
326 lines
11 KiB
C#
326 lines
11 KiB
C#
using System;
|
||
using System.Collections;
|
||
using System.Diagnostics;
|
||
using System.IO;
|
||
using System.Net;
|
||
using System.Net.Security;
|
||
using System.Net.Sockets;
|
||
using System.Security.Authentication;
|
||
using System.Security.Cryptography.X509Certificates;
|
||
using System.Text;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading;
|
||
/************************************************************************/
|
||
/* Author:huliang
|
||
* Email:huliang@yahoo.cn
|
||
* QQ:12658501
|
||
* 说明:转载请注明出处
|
||
/************************************************************************/
|
||
|
||
namespace EnAliKeywordSearch
|
||
{
|
||
public class HttpArgs
|
||
{
|
||
public string Url { get; set; }
|
||
public string Host { get; set; }
|
||
public int Port { get; set; }
|
||
public string Accept { get; set; }
|
||
public string Referer { get; set; }
|
||
public string Cookie { get; set; }
|
||
public string Data { get; set; }
|
||
public string UA { get; set; }
|
||
|
||
}
|
||
|
||
public static class HttpHelper
|
||
{
|
||
/// <summary>
|
||
/// 提交方法
|
||
/// </summary>
|
||
enum HttpMethod
|
||
{
|
||
GET,
|
||
POST
|
||
}
|
||
|
||
#region HttpWebRequest & HttpWebResponse
|
||
|
||
/// <summary>
|
||
/// Get方法
|
||
/// </summary>
|
||
/// <param name="geturl">请求地址</param>
|
||
/// <param name="cookieser">Cookies存储器</param>
|
||
/// <returns>请求返回的Stream</returns>
|
||
public static string Get(string url)
|
||
{
|
||
HttpArgs args = ParseURL(url);
|
||
string strhtml = InternalSocketHttp(args, HttpMethod.GET);
|
||
return strhtml;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Post方法
|
||
/// </summary>
|
||
/// <param name="posturl">请求地址</param>
|
||
/// <param name="bytes">Post数据</param>
|
||
/// <param name="cookieser">Cllkies存储器</param>
|
||
/// <returns>请求返回的流</returns>
|
||
public static string Post(string url,
|
||
byte[] bytes,
|
||
CookieContainer cookies,
|
||
Encoding encoding)
|
||
{
|
||
return null;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 根据Url得到host
|
||
/// </summary>
|
||
/// <param name="strUrl">url字符串</param>
|
||
/// <returns>host字符串</returns>
|
||
private static HttpArgs ParseURL(string strUrl)
|
||
{
|
||
HttpArgs args = new HttpArgs();
|
||
|
||
args.Host = "";
|
||
args.Port = 80;
|
||
args.Referer = "";
|
||
args.Cookie = "";
|
||
args.Url = "";
|
||
args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;";
|
||
args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";
|
||
|
||
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
|
||
int iIndex = strUrl.IndexOf(@"//");
|
||
if (iIndex <= 0)
|
||
return null;
|
||
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
|
||
string nohttpurl = strUrl.Substring(iIndex + 2);
|
||
string address = nohttpurl;
|
||
iIndex = nohttpurl.IndexOf(@"/");
|
||
if (iIndex > 0)
|
||
{
|
||
//www.alibaba.com:80
|
||
address = nohttpurl.Substring(0, iIndex);
|
||
args.Url = nohttpurl.Substring(iIndex);
|
||
}
|
||
iIndex = nohttpurl.IndexOf(@":");
|
||
if (iIndex > 0)
|
||
{
|
||
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":"));
|
||
args.Host = tempargs[0];
|
||
args.Port = int.Parse(tempargs[1]);
|
||
}
|
||
else
|
||
{
|
||
//www.alibaba.com:80
|
||
args.Host = address;
|
||
args.Port = 80;
|
||
}
|
||
return args;
|
||
}
|
||
#endregion
|
||
|
||
#region Socket
|
||
|
||
public static string Get(HttpArgs args)
|
||
{
|
||
return InternalSocketHttp(args, HttpMethod.GET);
|
||
}
|
||
|
||
public static string Post(IPEndPoint endpoint,
|
||
HttpArgs args)
|
||
{
|
||
return InternalSocketHttp(args, HttpMethod.POST);
|
||
}
|
||
|
||
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
|
||
{
|
||
|
||
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
|
||
{
|
||
try
|
||
{
|
||
sK.Connect(args.Host, args.Port);
|
||
if (sK.Connected)
|
||
{
|
||
byte[] buff = ParseHttpArgs(method, args);
|
||
if (sK.Send(buff) > 0)
|
||
{
|
||
string html = ParseResponse(sK, args);
|
||
return html;
|
||
}
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
Console.WriteLine(ex.Message);
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
private static string ParseResponse(Socket sK, HttpArgs args)
|
||
{
|
||
string header = ReadHeaderProcess(sK);
|
||
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
|
||
{
|
||
return string.Empty;
|
||
}
|
||
if (header.StartsWith("HTTP/1.1 302"))
|
||
{
|
||
int start = header
|
||
.ToUpper().IndexOf("LOCATION");
|
||
if (start > 0)
|
||
{
|
||
string temp = header.Substring(start, header.Length - start);
|
||
string[] sArry = Regex.Split(temp, "\r\n");
|
||
args.Url = sArry[0].Remove(0, 10);
|
||
return Get(args); //注意:302协议需要重定向
|
||
}
|
||
}
|
||
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
|
||
{
|
||
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
|
||
int content_length = 0;
|
||
if (start > 0)
|
||
{
|
||
string temp = header.Substring(start, header.Length - start);
|
||
string[] sArry = Regex.Split(temp, "\r\n");
|
||
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
|
||
if (content_length > 0)
|
||
{
|
||
byte[] bytes = new byte[content_length];
|
||
if (sK.Receive(bytes) > 0)
|
||
{
|
||
return Encoding.Default.GetString(bytes);
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
//不存在Content-Length协议头
|
||
return ParseResponse(sK);
|
||
}
|
||
}
|
||
return string.Empty;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 读取协议头
|
||
/// </summary>
|
||
/// <param name="args"></param>
|
||
/// <returns></returns>
|
||
static string ReadHeaderProcess(Socket sK)
|
||
{
|
||
StringBuilder bulider = new StringBuilder();
|
||
while (true)
|
||
{
|
||
byte[] buff = new byte[1];
|
||
int read = sK.Receive(buff, SocketFlags.None);
|
||
if (read > 0)
|
||
{
|
||
bulider.Append((char)buff[0]);
|
||
}
|
||
string temp = bulider.ToString();
|
||
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
|
||
{
|
||
break;
|
||
}
|
||
}
|
||
return bulider.ToString();
|
||
}
|
||
/// <summary>
|
||
/// 注意:此函数可能产生死循环
|
||
/// </summary>
|
||
/// <param name="ssl"></param>
|
||
/// <returns></returns>
|
||
static string ParseResponse(Socket sK)
|
||
{
|
||
StringBuilder bulider = new StringBuilder();
|
||
byte[] buff = new byte[1024];
|
||
int len = sK.Receive(buff);
|
||
string temp = string.Empty;
|
||
while (len > 0)
|
||
{
|
||
byte[] reads = new byte[len];
|
||
Array.Copy(buff, 0, reads, 0, len);
|
||
bulider.Append(Encoding.Default.GetString(reads));
|
||
temp = bulider.ToString();
|
||
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
|
||
{
|
||
break;
|
||
}
|
||
len = sK.Receive(buff);
|
||
}
|
||
return temp;
|
||
}
|
||
#endregion
|
||
|
||
#region Helper
|
||
|
||
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args)
|
||
{
|
||
StringBuilder bulider = new StringBuilder();
|
||
if (method.Equals(HttpMethod.POST))
|
||
{
|
||
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
|
||
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
|
||
}
|
||
else
|
||
{
|
||
bulider.AppendLine(string.Format("GET {0} HTTP/1.1", args.Url));
|
||
}
|
||
|
||
bulider.AppendLine(string.Format("Host: {0}:{1}", args.Host, args.Port));
|
||
|
||
bulider.AppendLine("User-Agent: " + args.UA);
|
||
//"User-Agent: Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";Mozilla/5.0 (Windows NT 6.1; IE 9.0)
|
||
if (!string.IsNullOrEmpty(args.Referer))
|
||
bulider.AppendLine(string.Format("Referer: {0}", args.Referer));
|
||
|
||
//bulider.AppendLine("Connection: close");
|
||
|
||
if (!string.IsNullOrEmpty(args.Accept))
|
||
bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
|
||
|
||
if (!string.IsNullOrEmpty(args.Cookie))
|
||
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
|
||
|
||
if (method.Equals(HttpMethod.POST))
|
||
{
|
||
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
|
||
bulider.Append(args.Data);
|
||
}
|
||
else
|
||
{
|
||
bulider.Append("\r\n");
|
||
}
|
||
|
||
string header = bulider.ToString();
|
||
return Encoding.Default.GetBytes(header);
|
||
}
|
||
#endregion
|
||
|
||
}
|
||
|
||
public class MilliTimer
|
||
{
|
||
private static double times { get; set; }
|
||
public static void start()
|
||
{
|
||
times = getTotalMilliseconds();
|
||
}
|
||
|
||
public static double getTimes()
|
||
{
|
||
return getTotalMilliseconds() - times;
|
||
}
|
||
|
||
|
||
public static double getTotalMilliseconds()
|
||
{
|
||
return DateTime.Now.Subtract(DateTime.Parse("1970-1-1")).TotalMilliseconds;
|
||
}
|
||
}
|
||
} |