diff --git a/Frm_Main.cs b/Frm_Main.cs index 1c997b7..d08d796 100644 --- a/Frm_Main.cs +++ b/Frm_Main.cs @@ -7,6 +7,7 @@ using System.Text; using System.Windows.Forms; using AliKeywordSearch; using System.Web; +using CityCraft; namespace EnAliKeywordSearch { @@ -50,11 +51,16 @@ namespace EnAliKeywordSearch url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", HttpUtility.UrlEncode(key), i); else url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", HttpUtility.UrlEncode(key), i); - string htmldoc = httpHelper.Get(url); + httpHelper.Send(HttpMethod.GET, url); + while (httpHelper.readyState != HttpReadyState.完成) + { + Application.DoEvents(); + } + string htmldoc = httpHelper.responseBody; if (string.IsNullOrEmpty(htmldoc)) { state.ForeColor = Color.Red; - state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg; + state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + httpHelper.ErrMsg; maybe = true; continue; } diff --git a/HttpHelper.cs b/HttpHelper.cs index 90015c1..52d0ce9 100644 --- a/HttpHelper.cs +++ b/HttpHelper.cs @@ -13,15 +13,15 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading; -namespace EnAliKeywordSearch +namespace CityCraft { + public enum HttpMethod + { + GET, + POST + } public class HttpArgs { - public enum HttpMethod - { - GET, - POST - } public string Url { get; set; } public string Host { get; set; } public int Port { get; set; } @@ -32,11 +32,23 @@ namespace EnAliKeywordSearch public string UA { get; set; } public HttpMethod Method { get; set; } } - + public enum HttpReadyState + { + 未初始化, + 载入, + 载入完成, + 交互, + 完成 + } public class HttpHelper { - public static int State = 0; - public static string ErrMsg = string.Empty; + public HttpReadyState readyState = HttpReadyState.未初始化; + public int Status = 0; + public string responseBody = ""; + public string responseText = ""; + public byte[] responseByte = null; + public HttpArgs args = new HttpArgs(); + public string ErrMsg = string.Empty; /// /// 提交方法 /// @@ -48,14 +60,19 @@ namespace EnAliKeywordSearch /// 请求地址 /// Cookies存储器 /// 请求返回的Stream - public string Get(string url) + public void Send(HttpMethod method, string url) { - HttpArgs args = ParseURL(url); - args.Method = HttpArgs.HttpMethod.GET; - string strhtml = InternalSocketHttp(args); - return strhtml; + readyState = HttpReadyState.载入; + ParseURL(url); + args.Method = method; + new Thread(new ThreadStart(ReciveData)).Start(); } + public void ReciveData() + { + responseBody = InternalSocketHttp(); + readyState = HttpReadyState.完成; + } /// /// Post方法 /// @@ -76,9 +93,10 @@ namespace EnAliKeywordSearch /// /// url字符串 /// host字符串 - private HttpArgs ParseURL(string strUrl) + private void ParseURL(string strUrl) { - HttpArgs args = new HttpArgs(); + if (args == null) + args = new HttpArgs(); args.Host = ""; args.Port = 80; @@ -91,7 +109,7 @@ namespace EnAliKeywordSearch //http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html int iIndex = strUrl.IndexOf(@"//"); if (iIndex <= 0) - return null; + args = null; //www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html string nohttpurl = strUrl.Substring(iIndex + 2); string address = nohttpurl; @@ -105,7 +123,7 @@ namespace EnAliKeywordSearch iIndex = nohttpurl.IndexOf(@":"); if (iIndex > 0) { - string[] tempargs = nohttpurl.Trim().Split(char.Parse(":")); + string[] tempargs = address.Trim().Split(char.Parse(":")); args.Host = tempargs[0]; args.Port = int.Parse(tempargs[1]); } @@ -115,13 +133,12 @@ namespace EnAliKeywordSearch args.Host = address; args.Port = 80; } - return args; } #endregion #region Socket - string InternalSocketHttp(HttpArgs args) + string InternalSocketHttp() { using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)) { @@ -132,7 +149,7 @@ namespace EnAliKeywordSearch socket.Connect(args.Host, args.Port); if (socket.Connected) { - byte[] buff = ParseHttpArgs(args); + byte[] buff = ParseHttpArgs(); if (socket.Send(buff) > 0) { List responseBytes = new List(); @@ -143,7 +160,9 @@ namespace EnAliKeywordSearch responseBytes.AddRange(new List(buffer));//添加数据到List iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据 } - return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args); + responseByte = responseBytes.ToArray(); + readyState = HttpReadyState.载入完成; + return ParseResponse(responseByte); } } } @@ -155,28 +174,27 @@ namespace EnAliKeywordSearch } } - private string ParseResponse(byte[] responseBytes, HttpArgs args) + private string ParseResponse(byte[] responseBytes) { string responseStr = Encoding.UTF8.GetString(responseBytes); - string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); - if (splitStr.Length == 2) + int splitindex = responseStr.IndexOf("\r\n\r\n"); + if (splitindex > 0) { - string responseHeader = splitStr[0]; - string responseBody = splitStr[1]; - - if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request")) + string responseHeader = responseStr.Substring(0, splitindex); + string responseBody = responseStr.Substring(splitindex + 4); + if (responseHeader.StartsWith("HTTP/1.1 400")) { - State = 400; + Status = 400; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 404")) { - State = 404; + Status = 404; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301")) { - State = 302; + Status = 302; int start = responseHeader.ToUpper().IndexOf("LOCATION"); if (start > 0) { @@ -185,20 +203,21 @@ namespace EnAliKeywordSearch args.Url = sArry[0].Remove(0, 10); if (args.Url == "") return string.Empty; - return InternalSocketHttp(args); //注意:302协议需要重定向 + return InternalSocketHttp(); //注意:302协议需要重定向 } } else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容 { - State = 200; + Status = 200; + //解压 DecompressWebPage(ref responseBytes, responseHeader); //转码 responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader); responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody); } - string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); - if (responseBodys.Length == 2) - responseBody = responseBodys[1]; + splitindex = responseBody.IndexOf("\r\n\r\n"); + if (splitindex > 0) + responseBody = responseBody.Substring(splitindex + 4); else responseBody = string.Empty; return responseBody; @@ -295,10 +314,10 @@ namespace EnAliKeywordSearch return strResponse; } - private byte[] ParseHttpArgs(HttpArgs args) + private byte[] ParseHttpArgs() { StringBuilder bulider = new StringBuilder(); - if (args.Method == HttpArgs.HttpMethod.POST) + if (args.Method == HttpMethod.POST) { bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url)); bulider.AppendLine("Content-Type: application/x-www-form-urlencoded"); @@ -325,7 +344,7 @@ namespace EnAliKeywordSearch if (!string.IsNullOrEmpty(args.Cookie)) bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie)); - if (args.Method == HttpArgs.HttpMethod.POST) + if (args.Method == HttpMethod.POST) { bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length)); bulider.Append(args.Data);