mirror of
https://e.coding.net/circlecloud/AliKeywordSearch.git
synced 2024-12-26 07:28:57 +00:00
修复HttpHelper...
This commit is contained in:
parent
d37a6ccbf5
commit
6246442492
10
Frm_Main.cs
10
Frm_Main.cs
@ -7,6 +7,7 @@ using System.Text;
|
||||
using System.Windows.Forms;
|
||||
using AliKeywordSearch;
|
||||
using System.Web;
|
||||
using CityCraft;
|
||||
|
||||
namespace EnAliKeywordSearch
|
||||
{
|
||||
@ -50,11 +51,16 @@ namespace EnAliKeywordSearch
|
||||
url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", HttpUtility.UrlEncode(key), i);
|
||||
else
|
||||
url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", HttpUtility.UrlEncode(key), i);
|
||||
string htmldoc = httpHelper.Get(url);
|
||||
httpHelper.Send(HttpMethod.GET, url);
|
||||
while (httpHelper.readyState != HttpReadyState.完成)
|
||||
{
|
||||
Application.DoEvents();
|
||||
}
|
||||
string htmldoc = httpHelper.responseBody;
|
||||
if (string.IsNullOrEmpty(htmldoc))
|
||||
{
|
||||
state.ForeColor = Color.Red;
|
||||
state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg;
|
||||
state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + httpHelper.ErrMsg;
|
||||
maybe = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -13,15 +13,15 @@ using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
|
||||
namespace EnAliKeywordSearch
|
||||
namespace CityCraft
|
||||
{
|
||||
public enum HttpMethod
|
||||
{
|
||||
GET,
|
||||
POST
|
||||
}
|
||||
public class HttpArgs
|
||||
{
|
||||
public enum HttpMethod
|
||||
{
|
||||
GET,
|
||||
POST
|
||||
}
|
||||
public string Url { get; set; }
|
||||
public string Host { get; set; }
|
||||
public int Port { get; set; }
|
||||
@ -32,11 +32,23 @@ namespace EnAliKeywordSearch
|
||||
public string UA { get; set; }
|
||||
public HttpMethod Method { get; set; }
|
||||
}
|
||||
|
||||
public enum HttpReadyState
|
||||
{
|
||||
未初始化,
|
||||
载入,
|
||||
载入完成,
|
||||
交互,
|
||||
完成
|
||||
}
|
||||
public class HttpHelper
|
||||
{
|
||||
public static int State = 0;
|
||||
public static string ErrMsg = string.Empty;
|
||||
public HttpReadyState readyState = HttpReadyState.未初始化;
|
||||
public int Status = 0;
|
||||
public string responseBody = "";
|
||||
public string responseText = "";
|
||||
public byte[] responseByte = null;
|
||||
public HttpArgs args = new HttpArgs();
|
||||
public string ErrMsg = string.Empty;
|
||||
/// <summary>
|
||||
/// 提交方法
|
||||
/// </summary>
|
||||
@ -48,14 +60,19 @@ namespace EnAliKeywordSearch
|
||||
/// <param name="geturl">请求地址</param>
|
||||
/// <param name="cookieser">Cookies存储器</param>
|
||||
/// <returns>请求返回的Stream</returns>
|
||||
public string Get(string url)
|
||||
public void Send(HttpMethod method, string url)
|
||||
{
|
||||
HttpArgs args = ParseURL(url);
|
||||
args.Method = HttpArgs.HttpMethod.GET;
|
||||
string strhtml = InternalSocketHttp(args);
|
||||
return strhtml;
|
||||
readyState = HttpReadyState.载入;
|
||||
ParseURL(url);
|
||||
args.Method = method;
|
||||
new Thread(new ThreadStart(ReciveData)).Start();
|
||||
}
|
||||
|
||||
public void ReciveData()
|
||||
{
|
||||
responseBody = InternalSocketHttp();
|
||||
readyState = HttpReadyState.完成;
|
||||
}
|
||||
/// <summary>
|
||||
/// Post方法
|
||||
/// </summary>
|
||||
@ -76,9 +93,10 @@ namespace EnAliKeywordSearch
|
||||
/// </summary>
|
||||
/// <param name="strUrl">url字符串</param>
|
||||
/// <returns>host字符串</returns>
|
||||
private HttpArgs ParseURL(string strUrl)
|
||||
private void ParseURL(string strUrl)
|
||||
{
|
||||
HttpArgs args = new HttpArgs();
|
||||
if (args == null)
|
||||
args = new HttpArgs();
|
||||
|
||||
args.Host = "";
|
||||
args.Port = 80;
|
||||
@ -91,7 +109,7 @@ namespace EnAliKeywordSearch
|
||||
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
|
||||
int iIndex = strUrl.IndexOf(@"//");
|
||||
if (iIndex <= 0)
|
||||
return null;
|
||||
args = null;
|
||||
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
|
||||
string nohttpurl = strUrl.Substring(iIndex + 2);
|
||||
string address = nohttpurl;
|
||||
@ -105,7 +123,7 @@ namespace EnAliKeywordSearch
|
||||
iIndex = nohttpurl.IndexOf(@":");
|
||||
if (iIndex > 0)
|
||||
{
|
||||
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":"));
|
||||
string[] tempargs = address.Trim().Split(char.Parse(":"));
|
||||
args.Host = tempargs[0];
|
||||
args.Port = int.Parse(tempargs[1]);
|
||||
}
|
||||
@ -115,13 +133,12 @@ namespace EnAliKeywordSearch
|
||||
args.Host = address;
|
||||
args.Port = 80;
|
||||
}
|
||||
return args;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Socket
|
||||
|
||||
string InternalSocketHttp(HttpArgs args)
|
||||
string InternalSocketHttp()
|
||||
{
|
||||
using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
|
||||
{
|
||||
@ -132,7 +149,7 @@ namespace EnAliKeywordSearch
|
||||
socket.Connect(args.Host, args.Port);
|
||||
if (socket.Connected)
|
||||
{
|
||||
byte[] buff = ParseHttpArgs(args);
|
||||
byte[] buff = ParseHttpArgs();
|
||||
if (socket.Send(buff) > 0)
|
||||
{
|
||||
List<byte> responseBytes = new List<byte>();
|
||||
@ -143,7 +160,9 @@ namespace EnAliKeywordSearch
|
||||
responseBytes.AddRange(new List<byte>(buffer));//添加数据到List
|
||||
iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据
|
||||
}
|
||||
return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args);
|
||||
responseByte = responseBytes.ToArray();
|
||||
readyState = HttpReadyState.载入完成;
|
||||
return ParseResponse(responseByte);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -155,28 +174,27 @@ namespace EnAliKeywordSearch
|
||||
}
|
||||
}
|
||||
|
||||
private string ParseResponse(byte[] responseBytes, HttpArgs args)
|
||||
private string ParseResponse(byte[] responseBytes)
|
||||
{
|
||||
string responseStr = Encoding.UTF8.GetString(responseBytes);
|
||||
string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
|
||||
if (splitStr.Length == 2)
|
||||
int splitindex = responseStr.IndexOf("\r\n\r\n");
|
||||
if (splitindex > 0)
|
||||
{
|
||||
string responseHeader = splitStr[0];
|
||||
string responseBody = splitStr[1];
|
||||
|
||||
if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request"))
|
||||
string responseHeader = responseStr.Substring(0, splitindex);
|
||||
string responseBody = responseStr.Substring(splitindex + 4);
|
||||
if (responseHeader.StartsWith("HTTP/1.1 400"))
|
||||
{
|
||||
State = 400;
|
||||
Status = 400;
|
||||
return string.Empty;
|
||||
}
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 404"))
|
||||
{
|
||||
State = 404;
|
||||
Status = 404;
|
||||
return string.Empty;
|
||||
}
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301"))
|
||||
{
|
||||
State = 302;
|
||||
Status = 302;
|
||||
int start = responseHeader.ToUpper().IndexOf("LOCATION");
|
||||
if (start > 0)
|
||||
{
|
||||
@ -185,20 +203,21 @@ namespace EnAliKeywordSearch
|
||||
args.Url = sArry[0].Remove(0, 10);
|
||||
if (args.Url == "")
|
||||
return string.Empty;
|
||||
return InternalSocketHttp(args); //注意:302协议需要重定向
|
||||
return InternalSocketHttp(); //注意:302协议需要重定向
|
||||
}
|
||||
}
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容
|
||||
{
|
||||
State = 200;
|
||||
Status = 200;
|
||||
//解压
|
||||
DecompressWebPage(ref responseBytes, responseHeader);
|
||||
//转码
|
||||
responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader);
|
||||
responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody);
|
||||
}
|
||||
string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
|
||||
if (responseBodys.Length == 2)
|
||||
responseBody = responseBodys[1];
|
||||
splitindex = responseBody.IndexOf("\r\n\r\n");
|
||||
if (splitindex > 0)
|
||||
responseBody = responseBody.Substring(splitindex + 4);
|
||||
else
|
||||
responseBody = string.Empty;
|
||||
return responseBody;
|
||||
@ -295,10 +314,10 @@ namespace EnAliKeywordSearch
|
||||
return strResponse;
|
||||
}
|
||||
|
||||
private byte[] ParseHttpArgs(HttpArgs args)
|
||||
private byte[] ParseHttpArgs()
|
||||
{
|
||||
StringBuilder bulider = new StringBuilder();
|
||||
if (args.Method == HttpArgs.HttpMethod.POST)
|
||||
if (args.Method == HttpMethod.POST)
|
||||
{
|
||||
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
|
||||
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
|
||||
@ -325,7 +344,7 @@ namespace EnAliKeywordSearch
|
||||
if (!string.IsNullOrEmpty(args.Cookie))
|
||||
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
|
||||
|
||||
if (args.Method == HttpArgs.HttpMethod.POST)
|
||||
if (args.Method == HttpMethod.POST)
|
||||
{
|
||||
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
|
||||
bulider.Append(args.Data);
|
||||
|
Loading…
Reference in New Issue
Block a user