From 5858a6323b3b9e50c9aebfab9687fcd58cb17eca Mon Sep 17 00:00:00 2001 From: j502647092 Date: Thu, 30 Jul 2015 20:39:07 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9HttpHelper=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=88=B0=E7=BA=BF=E7=A8=8B=20=E4=B8=8D?= =?UTF-8?q?=E9=98=BB=E5=A1=9EUI...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Frm_Main.Designer.cs | 2 +- Frm_Main.cs | 16 +++++--- HttpHelper.cs | 90 ++++++++++++++++++++++++++------------------ Program.cs | 40 ++++++++++---------- 4 files changed, 85 insertions(+), 63 deletions(-) diff --git a/Frm_Main.Designer.cs b/Frm_Main.Designer.cs index cb54c36..b32cf29 100644 --- a/Frm_Main.Designer.cs +++ b/Frm_Main.Designer.cs @@ -1,4 +1,4 @@ -namespace EnAliKeywordSearch +namespace SEOKeywordSearch { partial class Frm_Main { diff --git a/Frm_Main.cs b/Frm_Main.cs index d43d310..9fa36c7 100644 --- a/Frm_Main.cs +++ b/Frm_Main.cs @@ -9,8 +9,9 @@ using AliKeywordSearch; using System.Web; using System.Reflection; using System.Text.RegularExpressions; +using CityCraft; -namespace EnAliKeywordSearch +namespace SEOKeywordSearch { public partial class Frm_Main : Form { @@ -104,17 +105,23 @@ namespace EnAliKeywordSearch string url = String.Format(UrlEnum.GetUrl(s), HttpUtility.UrlEncode(key), (s.ToString() == "搜狗" ? i : i - 1));//s搜狗的页面值不同 this.ResultView.Rows[index].Cells[s.ToString() + "r"].Value = "查询第" + i + "页..."; Application.DoEvents(); - string htmldoc = httpHelper.Get(url); + httpHelper.Send(HttpMethod.GET, url); + while (HttpHelper.readyState != HttpReadyState.完成) + { + Application.DoEvents(); + } + string htmldoc = HttpHelper.responseBody; if (string.IsNullOrEmpty(htmldoc)) { state.ForeColor = Color.Red; state.Text = "关键词 " + key + " 在 " + s.ToString() + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg; + this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = Color.Red; maybe = true; continue; } - if (htmldoc == "0") + if (htmldoc.StartsWith("0")) { - this.ResultView.Rows[index].Cells[s.ToString() + "r"].Value = "已被屏蔽"; + pageinfo = "可能被屏蔽"; this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = Color.Red; break; } @@ -150,7 +157,6 @@ namespace EnAliKeywordSearch Application.DoEvents(); } this.ResultView.Rows[index].Cells[s.ToString() + "r"].Value = pageinfo + (maybe ? "(可能不准确)" : ""); - this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = maybe ? Color.Red : this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor; this.ResultView.Rows[index].Cells["查询时间"].Value = DateTime.Now.ToString(); Application.DoEvents(); } diff --git a/HttpHelper.cs b/HttpHelper.cs index 3597287..04c0cc9 100644 --- a/HttpHelper.cs +++ b/HttpHelper.cs @@ -13,15 +13,15 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading; -namespace EnAliKeywordSearch +namespace CityCraft { + public enum HttpMethod + { + GET, + POST + } public class HttpArgs { - public enum HttpMethod - { - GET, - POST - } public string Url { get; set; } public string Host { get; set; } public int Port { get; set; } @@ -32,10 +32,22 @@ namespace EnAliKeywordSearch public string UA { get; set; } public HttpMethod Method { get; set; } } - + public enum HttpReadyState + { + 未初始化, + 载入, + 载入完成, + 交互, + 完成 + } public class HttpHelper { - public static int State = 0; + public static HttpReadyState readyState = HttpReadyState.未初始化; + public static int Status = 0; + public static string responseBody = ""; + public static string responseText = ""; + public static byte[] responseByte = null; + public static HttpArgs args = new HttpArgs(); public static string ErrMsg = string.Empty; /// /// 提交方法 @@ -48,14 +60,19 @@ namespace EnAliKeywordSearch /// 请求地址 /// Cookies存储器 /// 请求返回的Stream - public string Get(string url) + public void Send(HttpMethod method, string url) { - HttpArgs args = ParseURL(url); - args.Method = HttpArgs.HttpMethod.GET; - string strhtml = InternalSocketHttp(args); - return strhtml; + readyState = HttpReadyState.载入; + ParseURL(url); + args.Method = method; + new Thread(new ThreadStart(ReciveData)).Start(); } + public void ReciveData() + { + responseBody = InternalSocketHttp(); + readyState = HttpReadyState.完成; + } /// /// Post方法 /// @@ -76,25 +93,23 @@ namespace EnAliKeywordSearch /// /// url字符串 /// host字符串 - private HttpArgs ParseURL(string strUrl) + private void ParseURL(string strUrl) { - HttpArgs args = new HttpArgs(); + if (args == null) + args = new HttpArgs(); args.Host = ""; args.Port = 80; args.Referer = ""; args.Cookie = ""; args.Url = ""; - args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;" - - //args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)"; - //args.UA = @"Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"; - args.UA = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 UBrowser/5.2.2603.31 Safari/537.36"; + args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;"; + args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)"; //http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html int iIndex = strUrl.IndexOf(@"//"); if (iIndex <= 0) - return null; + args = null; //www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html string nohttpurl = strUrl.Substring(iIndex + 2); string address = nohttpurl; @@ -108,7 +123,7 @@ namespace EnAliKeywordSearch iIndex = nohttpurl.IndexOf(@":"); if (iIndex > 0) { - string[] tempargs = nohttpurl.Trim().Split(char.Parse(":")); + string[] tempargs = address.Trim().Split(char.Parse(":")); args.Host = tempargs[0]; args.Port = int.Parse(tempargs[1]); } @@ -118,13 +133,12 @@ namespace EnAliKeywordSearch args.Host = address; args.Port = 80; } - return args; } #endregion #region Socket - string InternalSocketHttp(HttpArgs args) + string InternalSocketHttp() { using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)) { @@ -135,7 +149,7 @@ namespace EnAliKeywordSearch socket.Connect(args.Host, args.Port); if (socket.Connected) { - byte[] buff = ParseHttpArgs(args); + byte[] buff = ParseHttpArgs(); if (socket.Send(buff) > 0) { List responseBytes = new List(); @@ -146,7 +160,9 @@ namespace EnAliKeywordSearch responseBytes.AddRange(new List(buffer));//添加数据到List iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据 } - return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args); + responseByte = responseBytes.ToArray(); + readyState = HttpReadyState.载入完成; + return ParseResponse(responseByte); } } } @@ -158,7 +174,7 @@ namespace EnAliKeywordSearch } } - private string ParseResponse(byte[] responseBytes, HttpArgs args) + private string ParseResponse(byte[] responseBytes) { string responseStr = Encoding.UTF8.GetString(responseBytes); int splitindex = responseStr.IndexOf("\r\n\r\n"); @@ -166,20 +182,19 @@ namespace EnAliKeywordSearch { string responseHeader = responseStr.Substring(0, splitindex); string responseBody = responseStr.Substring(splitindex + 4); - - if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request")) + if (responseHeader.StartsWith("HTTP/1.1 400")) { - State = 400; + Status = 400; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 404")) { - State = 404; + Status = 404; return string.Empty; } else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301")) { - State = 302; + Status = 302; int start = responseHeader.ToUpper().IndexOf("LOCATION"); if (start > 0) { @@ -188,12 +203,13 @@ namespace EnAliKeywordSearch args.Url = sArry[0].Remove(0, 10); if (args.Url == "") return string.Empty; - return InternalSocketHttp(args); //注意:302协议需要重定向 + return InternalSocketHttp(); //注意:302协议需要重定向 } } else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容 { - State = 200; + Status = 200; + //解压 DecompressWebPage(ref responseBytes, responseHeader); //转码 responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader); @@ -298,10 +314,10 @@ namespace EnAliKeywordSearch return strResponse; } - private byte[] ParseHttpArgs(HttpArgs args) + private byte[] ParseHttpArgs() { StringBuilder bulider = new StringBuilder(); - if (args.Method == HttpArgs.HttpMethod.POST) + if (args.Method == HttpMethod.POST) { bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url)); bulider.AppendLine("Content-Type: application/x-www-form-urlencoded"); @@ -328,7 +344,7 @@ namespace EnAliKeywordSearch if (!string.IsNullOrEmpty(args.Cookie)) bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie)); - if (args.Method == HttpArgs.HttpMethod.POST) + if (args.Method == HttpMethod.POST) { bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length)); bulider.Append(args.Data); diff --git a/Program.cs b/Program.cs index faa3fc6..ce256f6 100644 --- a/Program.cs +++ b/Program.cs @@ -1,20 +1,20 @@ -using System; -using System.Collections.Generic; -using System.Windows.Forms; - -namespace EnAliKeywordSearch -{ - static class Program - { - /// - /// 应用程序的主入口点。 - /// - [STAThread] - static void Main() - { - Application.EnableVisualStyles(); - Application.SetCompatibleTextRenderingDefault(false); - Application.Run(new Frm_Main()); - } - } -} +using System; +using System.Collections.Generic; +using System.Windows.Forms; + +namespace SEOKeywordSearch +{ + static class Program + { + /// + /// 应用程序的主入口点。 + /// + [STAThread] + static void Main() + { + Application.EnableVisualStyles(); + Application.SetCompatibleTextRenderingDefault(false); + Application.Run(new Frm_Main()); + } + } +}