using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Net; using System.Net.Security; using System.Net.Sockets; using System.Security.Authentication; using System.Security.Cryptography.X509Certificates; using System.Text; using System.Text.RegularExpressions; using System.Threading; namespace EnAliKeywordSearch { public class HttpArgs { public enum HttpMethod { GET, POST } public string Url { get; set; } public string Host { get; set; } public int Port { get; set; } public string Accept { get; set; } public string Referer { get; set; } public string Cookie { get; set; } public string Data { get; set; } public string UA { get; set; } public HttpMethod Method { get; set; } } public class HttpHelper { public static int State = 0; public static string ErrMsg = string.Empty; /// /// 提交方法 /// #region HttpWebRequest & HttpWebResponse /// /// Get方法 /// /// 请求地址 /// Cookies存储器 /// 请求返回的Stream public string Get(string url) { HttpArgs args = ParseURL(url); args.Method = HttpArgs.HttpMethod.GET; string strhtml = InternalSocketHttp(args); return strhtml; } /// /// Post方法 /// /// 请求地址 /// Post数据 /// Cllkies存储器 /// 请求返回的流 public string Post(string url, byte[] bytes, CookieContainer cookies, Encoding encoding) { return null; } /// /// 根据Url得到host /// /// url字符串 /// host字符串 private HttpArgs ParseURL(string strUrl) { HttpArgs args = new HttpArgs(); args.Host = ""; args.Port = 80; args.Referer = ""; args.Cookie = ""; args.Url = ""; args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;"; args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)"; //http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html int iIndex = strUrl.IndexOf(@"//"); if (iIndex <= 0) return null; //www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html string nohttpurl = strUrl.Substring(iIndex + 2); string address = nohttpurl; iIndex = nohttpurl.IndexOf(@"/"); if (iIndex > 0) { //www.alibaba.com:80 address = nohttpurl.Substring(0, iIndex); args.Url = nohttpurl.Substring(iIndex); } iIndex = nohttpurl.IndexOf(@":"); if (iIndex > 0) { string[] tempargs = nohttpurl.Trim().Split(char.Parse(":")); args.Host = tempargs[0]; args.Port = int.Parse(tempargs[1]); } else { //www.alibaba.com:80 args.Host = address; args.Port = 80; } return args; } #endregion #region Socket string InternalSocketHttp(HttpArgs args) { using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)) { try { socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.SendTimeout, 1000); socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReceiveTimeout, 5000); socket.Connect(args.Host, args.Port); if (socket.Connected) { byte[] buff = ParseHttpArgs(args); if (socket.Send(buff) > 0) { List responseBytes = new List(); byte[] buffer = new byte[1024]; int iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None); while (iNumber > 0)//使用了Connection: Close 所以判断长度为0 时停止接受 { responseBytes.AddRange(new List(buffer));//添加数据到List iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据 } return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args); } } } catch (Exception e) { ErrMsg = e.Message; } return string.Empty; } } private string ParseResponse(byte[] responseBytes, HttpArgs args) { string responseStr = Encoding.UTF8.GetString(responseBytes); string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); if (splitStr.Length == 2) { string responseHeader = splitStr[0]; string responseBody = splitStr[1]; if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request")) { State = 400; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 404")) { State = 404; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301")) { State = 302; int start = responseHeader.ToUpper().IndexOf("LOCATION"); if (start > 0) { string temp = responseHeader.Substring(start, responseHeader.Length - start); string[] sArry = Regex.Split(temp, "\r\n"); args.Url = sArry[0].Remove(0, 10); if (args.Url == "") return string.Empty; return InternalSocketHttp(args); //注意:302协议需要重定向 } } else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容 { State = 200; DecompressWebPage(ref responseBytes, responseHeader); //转码 responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader); responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody); } string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); if (responseBodys.Length == 2) responseBody = responseBodys[1]; else responseBody = string.Empty; return responseBody; } return string.Empty; } #endregion #region Helper /// /// 解压网页 /// /// 网页字节数组含http头 /// 数组长度 /// Http头字符串 /// 网页正文开始位置 private void DecompressWebPage(ref byte[] responseBytes, string strHeader) { Regex regZip = new Regex(@"Content-Encoding:\s+gzip[^\n]*\r\n", RegexOptions.IgnoreCase); if (regZip.IsMatch(strHeader)) { responseBytes = Decompress(responseBytes); } } /// /// 解压gzip网页 /// /// 压缩过的字符串字节数组 /// 解压后的字节数组 private byte[] Decompress(byte[] szSource) { MemoryStream msSource = new MemoryStream(szSource); //DeflateStream 也可以这儿 GZipStream stream = new GZipStream(msSource, CompressionMode.Decompress); byte[] szTotal = new byte[40 * 1024]; long lTotal = 0; byte[] buffer = new byte[8]; int iCount = 0; do { iCount = stream.Read(buffer, 0, 8); if (szTotal.Length <= lTotal + iCount) //放大数组 { byte[] temp = new byte[szTotal.Length * 10]; szTotal.CopyTo(temp, 0); szTotal = temp; } buffer.CopyTo(szTotal, lTotal); lTotal += iCount; } while (iCount != 0); byte[] szDest = new byte[lTotal]; Array.Copy(szTotal, 0, szDest, 0, lTotal); return szDest; } /// /// 根据Http头标记里面的字符编码解析字符串 /// /// 网页内容字节数组(除http头以外的内容) /// 网页内容字节数组长度 /// http头的字符串 /// 转好的字符串 private string DecodeWebStringByHttpHeader(byte[] responseBytes, string strHeader) { string strResponse = ""; if (strHeader.Contains("charset=GBK") || strHeader.Contains("charset=gb2312")) { strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes); } else strResponse = Encoding.UTF8.GetString(responseBytes); return strResponse; } /// /// 根据网页meta标记里面的字符编码解析字符串 /// /// 网页内容字节数组(除http头以外的内容) /// 网页内容字节数组长度 /// 网页内容字符串, 可能已经根据其它转码要求转换过的字符串 /// 转好的字符串 private string DecodeWebStringByHtmlPageInfo(byte[] responseBytes, string strResponse) { Regex regGB2312 = new Regex(@"]+Content-Type[^>]+gb2312[^>]*>", RegexOptions.IgnoreCase); Regex regGBK = new Regex(@"]+Content-Type[^>]+gbk[^>]*>", RegexOptions.IgnoreCase); Regex regBig5 = new Regex(@"]+Content-Type[^>]+Big5[^>]*>", RegexOptions.IgnoreCase); if (regGB2312.IsMatch(strResponse) || regGBK.IsMatch(strResponse)) strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes); if (regBig5.IsMatch(strResponse)) strResponse = Encoding.GetEncoding("Big5").GetString(responseBytes); return strResponse; } private byte[] ParseHttpArgs(HttpArgs args) { StringBuilder bulider = new StringBuilder(); if (args.Method == HttpArgs.HttpMethod.POST) { bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url)); bulider.AppendLine("Content-Type: application/x-www-form-urlencoded"); } else { bulider.AppendLine(string.Format("GET {0} HTTP/1.1", args.Url)); } bulider.AppendLine(string.Format("Host: {0}:{1}", args.Host, args.Port)); bulider.AppendLine("User-Agent: " + args.UA); //"User-Agent: Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";Mozilla/5.0 (Windows NT 6.1; IE 9.0) if (!string.IsNullOrEmpty(args.Referer)) bulider.AppendLine(string.Format("Referer: {0}", args.Referer)); //bulider.AppendLine("Connection: close"); bulider.AppendLine("Connection: Close"); if (!string.IsNullOrEmpty(args.Accept)) bulider.AppendLine(string.Format("Accept: {0}", args.Accept)); if (!string.IsNullOrEmpty(args.Cookie)) bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie)); if (args.Method == HttpArgs.HttpMethod.POST) { bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length)); bulider.Append(args.Data); } else { bulider.Append("\r\n"); } string header = bulider.ToString(); return Encoding.Default.GetBytes(header); } #endregion } public class MilliTimer { private static double times { get; set; } public static void start() { times = getTotalMilliseconds(); } public static double getTimes() { return getTotalMilliseconds() - times; } public static double getTotalMilliseconds() { return DateTime.Now.Subtract(DateTime.Parse("1970-1-1")).TotalMilliseconds; } } }