修复HttpHelper...

master
j502647092 2015-07-30 20:47:57 +08:00
parent d37a6ccbf5
commit 6246442492
2 changed files with 67 additions and 42 deletions

View File

@ -7,6 +7,7 @@ using System.Text;
using System.Windows.Forms; using System.Windows.Forms;
using AliKeywordSearch; using AliKeywordSearch;
using System.Web; using System.Web;
using CityCraft;
namespace EnAliKeywordSearch namespace EnAliKeywordSearch
{ {
@ -50,11 +51,16 @@ namespace EnAliKeywordSearch
url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", HttpUtility.UrlEncode(key), i); url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", HttpUtility.UrlEncode(key), i);
else else
url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", HttpUtility.UrlEncode(key), i); url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", HttpUtility.UrlEncode(key), i);
string htmldoc = httpHelper.Get(url); httpHelper.Send(HttpMethod.GET, url);
while (httpHelper.readyState != HttpReadyState.)
{
Application.DoEvents();
}
string htmldoc = httpHelper.responseBody;
if (string.IsNullOrEmpty(htmldoc)) if (string.IsNullOrEmpty(htmldoc))
{ {
state.ForeColor = Color.Red; state.ForeColor = Color.Red;
state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg; state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + httpHelper.ErrMsg;
maybe = true; maybe = true;
continue; continue;
} }

View File

@ -13,15 +13,15 @@ using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading; using System.Threading;
namespace EnAliKeywordSearch namespace CityCraft
{ {
public enum HttpMethod
{
GET,
POST
}
public class HttpArgs public class HttpArgs
{ {
public enum HttpMethod
{
GET,
POST
}
public string Url { get; set; } public string Url { get; set; }
public string Host { get; set; } public string Host { get; set; }
public int Port { get; set; } public int Port { get; set; }
@ -32,11 +32,23 @@ namespace EnAliKeywordSearch
public string UA { get; set; } public string UA { get; set; }
public HttpMethod Method { get; set; } public HttpMethod Method { get; set; }
} }
public enum HttpReadyState
{
,
,
,
,
}
public class HttpHelper public class HttpHelper
{ {
public static int State = 0; public HttpReadyState readyState = HttpReadyState.;
public static string ErrMsg = string.Empty; public int Status = 0;
public string responseBody = "";
public string responseText = "";
public byte[] responseByte = null;
public HttpArgs args = new HttpArgs();
public string ErrMsg = string.Empty;
/// <summary> /// <summary>
/// 提交方法 /// 提交方法
/// </summary> /// </summary>
@ -48,14 +60,19 @@ namespace EnAliKeywordSearch
/// <param name="geturl">请求地址</param> /// <param name="geturl">请求地址</param>
/// <param name="cookieser">Cookies存储器</param> /// <param name="cookieser">Cookies存储器</param>
/// <returns>请求返回的Stream</returns> /// <returns>请求返回的Stream</returns>
public string Get(string url) public void Send(HttpMethod method, string url)
{ {
HttpArgs args = ParseURL(url); readyState = HttpReadyState.;
args.Method = HttpArgs.HttpMethod.GET; ParseURL(url);
string strhtml = InternalSocketHttp(args); args.Method = method;
return strhtml; new Thread(new ThreadStart(ReciveData)).Start();
} }
public void ReciveData()
{
responseBody = InternalSocketHttp();
readyState = HttpReadyState.;
}
/// <summary> /// <summary>
/// Post方法 /// Post方法
/// </summary> /// </summary>
@ -76,9 +93,10 @@ namespace EnAliKeywordSearch
/// </summary> /// </summary>
/// <param name="strUrl">url字符串</param> /// <param name="strUrl">url字符串</param>
/// <returns>host字符串</returns> /// <returns>host字符串</returns>
private HttpArgs ParseURL(string strUrl) private void ParseURL(string strUrl)
{ {
HttpArgs args = new HttpArgs(); if (args == null)
args = new HttpArgs();
args.Host = ""; args.Host = "";
args.Port = 80; args.Port = 80;
@ -91,7 +109,7 @@ namespace EnAliKeywordSearch
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html //http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
int iIndex = strUrl.IndexOf(@"//"); int iIndex = strUrl.IndexOf(@"//");
if (iIndex <= 0) if (iIndex <= 0)
return null; args = null;
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html //www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
string nohttpurl = strUrl.Substring(iIndex + 2); string nohttpurl = strUrl.Substring(iIndex + 2);
string address = nohttpurl; string address = nohttpurl;
@ -105,7 +123,7 @@ namespace EnAliKeywordSearch
iIndex = nohttpurl.IndexOf(@":"); iIndex = nohttpurl.IndexOf(@":");
if (iIndex > 0) if (iIndex > 0)
{ {
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":")); string[] tempargs = address.Trim().Split(char.Parse(":"));
args.Host = tempargs[0]; args.Host = tempargs[0];
args.Port = int.Parse(tempargs[1]); args.Port = int.Parse(tempargs[1]);
} }
@ -115,13 +133,12 @@ namespace EnAliKeywordSearch
args.Host = address; args.Host = address;
args.Port = 80; args.Port = 80;
} }
return args;
} }
#endregion #endregion
#region Socket #region Socket
string InternalSocketHttp(HttpArgs args) string InternalSocketHttp()
{ {
using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)) using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
{ {
@ -132,7 +149,7 @@ namespace EnAliKeywordSearch
socket.Connect(args.Host, args.Port); socket.Connect(args.Host, args.Port);
if (socket.Connected) if (socket.Connected)
{ {
byte[] buff = ParseHttpArgs(args); byte[] buff = ParseHttpArgs();
if (socket.Send(buff) > 0) if (socket.Send(buff) > 0)
{ {
List<byte> responseBytes = new List<byte>(); List<byte> responseBytes = new List<byte>();
@ -143,7 +160,9 @@ namespace EnAliKeywordSearch
responseBytes.AddRange(new List<byte>(buffer));//添加数据到List responseBytes.AddRange(new List<byte>(buffer));//添加数据到List
iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据 iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据
} }
return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args); responseByte = responseBytes.ToArray();
readyState = HttpReadyState.;
return ParseResponse(responseByte);
} }
} }
} }
@ -155,28 +174,27 @@ namespace EnAliKeywordSearch
} }
} }
private string ParseResponse(byte[] responseBytes, HttpArgs args) private string ParseResponse(byte[] responseBytes)
{ {
string responseStr = Encoding.UTF8.GetString(responseBytes); string responseStr = Encoding.UTF8.GetString(responseBytes);
string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); int splitindex = responseStr.IndexOf("\r\n\r\n");
if (splitStr.Length == 2) if (splitindex > 0)
{ {
string responseHeader = splitStr[0]; string responseHeader = responseStr.Substring(0, splitindex);
string responseBody = splitStr[1]; string responseBody = responseStr.Substring(splitindex + 4);
if (responseHeader.StartsWith("HTTP/1.1 400"))
if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request"))
{ {
State = 400; Status = 400;
return string.Empty; return string.Empty;
} }
else if (responseHeader.StartsWith("HTTP/1.1 404")) else if (responseHeader.StartsWith("HTTP/1.1 404"))
{ {
State = 404; Status = 404;
return string.Empty; return string.Empty;
} }
else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301")) else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301"))
{ {
State = 302; Status = 302;
int start = responseHeader.ToUpper().IndexOf("LOCATION"); int start = responseHeader.ToUpper().IndexOf("LOCATION");
if (start > 0) if (start > 0)
{ {
@ -185,20 +203,21 @@ namespace EnAliKeywordSearch
args.Url = sArry[0].Remove(0, 10); args.Url = sArry[0].Remove(0, 10);
if (args.Url == "") if (args.Url == "")
return string.Empty; return string.Empty;
return InternalSocketHttp(args); //注意302协议需要重定向 return InternalSocketHttp(); //注意302协议需要重定向
} }
} }
else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容 else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容
{ {
State = 200; Status = 200;
//解压
DecompressWebPage(ref responseBytes, responseHeader); DecompressWebPage(ref responseBytes, responseHeader);
//转码 //转码
responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader); responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader);
responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody); responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody);
} }
string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2); splitindex = responseBody.IndexOf("\r\n\r\n");
if (responseBodys.Length == 2) if (splitindex > 0)
responseBody = responseBodys[1]; responseBody = responseBody.Substring(splitindex + 4);
else else
responseBody = string.Empty; responseBody = string.Empty;
return responseBody; return responseBody;
@ -295,10 +314,10 @@ namespace EnAliKeywordSearch
return strResponse; return strResponse;
} }
private byte[] ParseHttpArgs(HttpArgs args) private byte[] ParseHttpArgs()
{ {
StringBuilder bulider = new StringBuilder(); StringBuilder bulider = new StringBuilder();
if (args.Method == HttpArgs.HttpMethod.POST) if (args.Method == HttpMethod.POST)
{ {
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url)); bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded"); bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
@ -325,7 +344,7 @@ namespace EnAliKeywordSearch
if (!string.IsNullOrEmpty(args.Cookie)) if (!string.IsNullOrEmpty(args.Cookie))
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie)); bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
if (args.Method == HttpArgs.HttpMethod.POST) if (args.Method == HttpMethod.POST)
{ {
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length)); bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
bulider.Append(args.Data); bulider.Append(args.Data);