mirror of
https://e.coding.net/circlecloud/AliKeywordSearch.git
synced 2024-11-16 00:48:59 +00:00
优化Socket获取流程 界面交互...
This commit is contained in:
parent
d4012764f7
commit
24f77e9e79
@ -9,7 +9,7 @@
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>AliKeywordSearch</RootNamespace>
|
||||
<AssemblyName>AliKeywordSearch</AssemblyName>
|
||||
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
|
||||
<TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
@ -40,10 +40,7 @@
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Web" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Deployment" />
|
||||
<Reference Include="System.Drawing" />
|
||||
|
@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace AliKeywordSearch
|
||||
|
66
Frm_Main.Designer.cs
generated
66
Frm_Main.Designer.cs
generated
@ -47,9 +47,9 @@
|
||||
this.readin = new System.Windows.Forms.Button();
|
||||
this.openFile = new System.Windows.Forms.OpenFileDialog();
|
||||
this.saveFile = new System.Windows.Forms.SaveFileDialog();
|
||||
this.backgroundWorker = new System.ComponentModel.BackgroundWorker();
|
||||
this.clearresult = new System.Windows.Forms.Button();
|
||||
this.clearsearch = new System.Windows.Forms.Button();
|
||||
this.label2 = new System.Windows.Forms.Label();
|
||||
((System.ComponentModel.ISupportInitialize)(this.SearchView)).BeginInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.ResultView)).BeginInit();
|
||||
this.SuspendLayout();
|
||||
@ -60,10 +60,10 @@
|
||||
this.SearchView.Columns.AddRange(new System.Windows.Forms.DataGridViewColumn[] {
|
||||
this.company,
|
||||
this.keyword});
|
||||
this.SearchView.Location = new System.Drawing.Point(12, 54);
|
||||
this.SearchView.Location = new System.Drawing.Point(11, 58);
|
||||
this.SearchView.Name = "SearchView";
|
||||
this.SearchView.RowTemplate.Height = 23;
|
||||
this.SearchView.Size = new System.Drawing.Size(843, 241);
|
||||
this.SearchView.Size = new System.Drawing.Size(850, 200);
|
||||
this.SearchView.TabIndex = 0;
|
||||
//
|
||||
// company
|
||||
@ -86,10 +86,10 @@
|
||||
this.关键词,
|
||||
this.排名,
|
||||
this.查询时间});
|
||||
this.ResultView.Location = new System.Drawing.Point(12, 322);
|
||||
this.ResultView.Location = new System.Drawing.Point(11, 293);
|
||||
this.ResultView.Name = "ResultView";
|
||||
this.ResultView.RowTemplate.Height = 23;
|
||||
this.ResultView.Size = new System.Drawing.Size(843, 460);
|
||||
this.ResultView.Size = new System.Drawing.Size(850, 300);
|
||||
this.ResultView.TabIndex = 1;
|
||||
//
|
||||
// 公司名称
|
||||
@ -118,7 +118,7 @@
|
||||
//
|
||||
// search
|
||||
//
|
||||
this.search.Location = new System.Drawing.Point(399, 9);
|
||||
this.search.Location = new System.Drawing.Point(360, 10);
|
||||
this.search.Name = "search";
|
||||
this.search.Size = new System.Drawing.Size(152, 36);
|
||||
this.search.TabIndex = 2;
|
||||
@ -128,7 +128,7 @@
|
||||
//
|
||||
// max
|
||||
//
|
||||
this.max.Location = new System.Drawing.Point(112, 12);
|
||||
this.max.Location = new System.Drawing.Point(663, 10);
|
||||
this.max.Name = "max";
|
||||
this.max.Size = new System.Drawing.Size(83, 21);
|
||||
this.max.TabIndex = 3;
|
||||
@ -137,8 +137,7 @@
|
||||
// cnAli
|
||||
//
|
||||
this.cnAli.AutoSize = true;
|
||||
this.cnAli.Enabled = false;
|
||||
this.cnAli.Location = new System.Drawing.Point(303, 19);
|
||||
this.cnAli.Location = new System.Drawing.Point(740, 35);
|
||||
this.cnAli.Name = "cnAli";
|
||||
this.cnAli.Size = new System.Drawing.Size(71, 16);
|
||||
this.cnAli.TabIndex = 5;
|
||||
@ -150,7 +149,7 @@
|
||||
//
|
||||
this.enAli.AutoSize = true;
|
||||
this.enAli.Checked = true;
|
||||
this.enAli.Location = new System.Drawing.Point(226, 19);
|
||||
this.enAli.Location = new System.Drawing.Point(663, 35);
|
||||
this.enAli.Name = "enAli";
|
||||
this.enAli.Size = new System.Drawing.Size(71, 16);
|
||||
this.enAli.TabIndex = 6;
|
||||
@ -161,17 +160,18 @@
|
||||
// label1
|
||||
//
|
||||
this.label1.AutoSize = true;
|
||||
this.label1.Location = new System.Drawing.Point(16, 16);
|
||||
this.label1.Location = new System.Drawing.Point(592, 13);
|
||||
this.label1.Name = "label1";
|
||||
this.label1.Size = new System.Drawing.Size(89, 12);
|
||||
this.label1.Size = new System.Drawing.Size(65, 12);
|
||||
this.label1.TabIndex = 7;
|
||||
this.label1.Text = "最大查询页数: ";
|
||||
this.label1.Text = "查询页数: ";
|
||||
//
|
||||
// state
|
||||
//
|
||||
this.state.AutoSize = true;
|
||||
this.state.Font = new System.Drawing.Font("宋体", 10.5F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
|
||||
this.state.Location = new System.Drawing.Point(96, 302);
|
||||
this.state.ForeColor = System.Drawing.SystemColors.ControlText;
|
||||
this.state.Location = new System.Drawing.Point(98, 267);
|
||||
this.state.Name = "state";
|
||||
this.state.Size = new System.Drawing.Size(37, 14);
|
||||
this.state.TabIndex = 8;
|
||||
@ -180,21 +180,21 @@
|
||||
//
|
||||
// saveout
|
||||
//
|
||||
this.saveout.Location = new System.Drawing.Point(777, 16);
|
||||
this.saveout.Location = new System.Drawing.Point(181, 29);
|
||||
this.saveout.Name = "saveout";
|
||||
this.saveout.Size = new System.Drawing.Size(75, 23);
|
||||
this.saveout.Size = new System.Drawing.Size(73, 23);
|
||||
this.saveout.TabIndex = 9;
|
||||
this.saveout.Text = "导出数据";
|
||||
this.saveout.Text = "导出关键词";
|
||||
this.saveout.UseVisualStyleBackColor = true;
|
||||
this.saveout.Click += new System.EventHandler(this.saveout_Click);
|
||||
//
|
||||
// readin
|
||||
//
|
||||
this.readin.Location = new System.Drawing.Point(615, 16);
|
||||
this.readin.Location = new System.Drawing.Point(19, 29);
|
||||
this.readin.Name = "readin";
|
||||
this.readin.Size = new System.Drawing.Size(75, 23);
|
||||
this.readin.Size = new System.Drawing.Size(73, 23);
|
||||
this.readin.TabIndex = 10;
|
||||
this.readin.Text = "导入数据";
|
||||
this.readin.Text = "导入关键词";
|
||||
this.readin.UseVisualStyleBackColor = true;
|
||||
this.readin.Click += new System.EventHandler(this.readin_Click);
|
||||
//
|
||||
@ -204,9 +204,9 @@
|
||||
//
|
||||
// clearresult
|
||||
//
|
||||
this.clearresult.Location = new System.Drawing.Point(14, 299);
|
||||
this.clearresult.Location = new System.Drawing.Point(19, 264);
|
||||
this.clearresult.Name = "clearresult";
|
||||
this.clearresult.Size = new System.Drawing.Size(68, 20);
|
||||
this.clearresult.Size = new System.Drawing.Size(73, 23);
|
||||
this.clearresult.TabIndex = 11;
|
||||
this.clearresult.Text = "清除结果";
|
||||
this.clearresult.UseVisualStyleBackColor = true;
|
||||
@ -214,18 +214,28 @@
|
||||
//
|
||||
// clearsearch
|
||||
//
|
||||
this.clearsearch.Location = new System.Drawing.Point(696, 16);
|
||||
this.clearsearch.Location = new System.Drawing.Point(100, 29);
|
||||
this.clearsearch.Name = "clearsearch";
|
||||
this.clearsearch.Size = new System.Drawing.Size(75, 23);
|
||||
this.clearsearch.Size = new System.Drawing.Size(73, 23);
|
||||
this.clearsearch.TabIndex = 12;
|
||||
this.clearsearch.Text = "清除数据";
|
||||
this.clearsearch.Text = "清除关键词";
|
||||
this.clearsearch.UseVisualStyleBackColor = true;
|
||||
this.clearsearch.Click += new System.EventHandler(this.clearsearch_Click);
|
||||
//
|
||||
// label2
|
||||
//
|
||||
this.label2.AutoSize = true;
|
||||
this.label2.Location = new System.Drawing.Point(592, 37);
|
||||
this.label2.Name = "label2";
|
||||
this.label2.Size = new System.Drawing.Size(65, 12);
|
||||
this.label2.TabIndex = 13;
|
||||
this.label2.Text = "查询类型: ";
|
||||
//
|
||||
// Frm_Main
|
||||
//
|
||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.None;
|
||||
this.ClientSize = new System.Drawing.Size(864, 794);
|
||||
this.ClientSize = new System.Drawing.Size(872, 603);
|
||||
this.Controls.Add(this.label2);
|
||||
this.Controls.Add(this.clearsearch);
|
||||
this.Controls.Add(this.clearresult);
|
||||
this.Controls.Add(this.readin);
|
||||
@ -239,6 +249,8 @@
|
||||
this.Controls.Add(this.ResultView);
|
||||
this.Controls.Add(this.SearchView);
|
||||
this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon")));
|
||||
this.MaximizeBox = false;
|
||||
this.MinimizeBox = false;
|
||||
this.Name = "Frm_Main";
|
||||
this.Text = "阿里排名查询软件 2015-7-25";
|
||||
this.Load += new System.EventHandler(this.Frm_Main_Load);
|
||||
@ -269,9 +281,9 @@
|
||||
private System.Windows.Forms.Button readin;
|
||||
private System.Windows.Forms.OpenFileDialog openFile;
|
||||
private System.Windows.Forms.SaveFileDialog saveFile;
|
||||
private System.ComponentModel.BackgroundWorker backgroundWorker;
|
||||
private System.Windows.Forms.Button clearresult;
|
||||
private System.Windows.Forms.Button clearsearch;
|
||||
private System.Windows.Forms.Label label2;
|
||||
|
||||
}
|
||||
}
|
||||
|
49
Frm_Main.cs
49
Frm_Main.cs
@ -3,7 +3,6 @@ using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Windows.Forms;
|
||||
using AliKeywordSearch;
|
||||
@ -15,37 +14,14 @@ namespace EnAliKeywordSearch
|
||||
public Frm_Main()
|
||||
{
|
||||
InitializeComponent();
|
||||
|
||||
backgroundWorker.ProgressChanged += backgroundWorker_ProgressChanged;
|
||||
backgroundWorker.DoWork += backgroundWorker_DoWork;
|
||||
backgroundWorker.RunWorkerCompleted += backgroundWorker_RunWorkerCompleted;
|
||||
}
|
||||
|
||||
private void backgroundWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void backgroundWorker_ProgressChanged(object sender, ProgressChangedEventArgs e)
|
||||
{
|
||||
|
||||
}
|
||||
private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void DoWork(object sender, DoWorkEventArgs e)
|
||||
{
|
||||
BackgroundWorker bw = sender as BackgroundWorker;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void search_Click(object sender, EventArgs e)
|
||||
{
|
||||
search.Enabled = false;
|
||||
state.ForeColor = Color.Black;
|
||||
state.ForeColor = Color.Red;
|
||||
HttpHelper httpHelper = new HttpHelper();
|
||||
foreach (DataGridViewRow item in this.SearchView.Rows)
|
||||
{
|
||||
string cpy = item.Cells["company"].Value == null ? "" : item.Cells["company"].Value.ToString();
|
||||
@ -54,12 +30,16 @@ namespace EnAliKeywordSearch
|
||||
{
|
||||
foreach (string key in keys.Split(','))
|
||||
{
|
||||
bool maybe = false;
|
||||
int maxpage = this.ToInt(max.Text);
|
||||
int index = this.ResultView.Rows.Add();
|
||||
this.ResultView.FirstDisplayedScrollingRowIndex = index;
|
||||
this.ResultView.Rows[index].Cells["公司名称"].Value = cpy;
|
||||
this.ResultView.Rows[index].Cells["关键词"].Value = key;
|
||||
state.ForeColor = Color.Black;
|
||||
state.Text = "正在查询 " + cpy + " 的关键词 " + key;
|
||||
Application.DoEvents();
|
||||
string pageinfo = string.Empty;
|
||||
for (int i = 1; i <= maxpage; i++)
|
||||
{
|
||||
string url = string.Empty;
|
||||
@ -69,30 +49,35 @@ namespace EnAliKeywordSearch
|
||||
url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", key, i);
|
||||
else
|
||||
url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", key, i);
|
||||
string htmldoc = HttpHelper.Get(url);
|
||||
string htmldoc = httpHelper.Get(url);
|
||||
if (string.IsNullOrEmpty(htmldoc))
|
||||
{
|
||||
this.ResultView.Rows[index].Cells["排名"].Value = "网页抓取失败";
|
||||
break;
|
||||
state.ForeColor = Color.Red;
|
||||
state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg;
|
||||
maybe = true;
|
||||
continue;
|
||||
}
|
||||
if (htmldoc.Contains(cpy))
|
||||
{
|
||||
this.ResultView.Rows[index].Cells["排名"].Value = "第" + i + "页";
|
||||
pageinfo = "第" + i + "页";
|
||||
break;
|
||||
}
|
||||
if (i == maxpage)
|
||||
{
|
||||
this.ResultView.Rows[index].Cells["排名"].Value = maxpage + "页以后";
|
||||
pageinfo = maxpage + "页以后";
|
||||
break;
|
||||
}
|
||||
Application.DoEvents();
|
||||
}
|
||||
this.ResultView.Rows[index].Cells["排名"].Value = pageinfo + (maybe ? "(可能不准确)" : "");
|
||||
this.ResultView.Rows[index].Cells["排名"].Style.ForeColor = maybe ? Color.Red : Color.Black;
|
||||
this.ResultView.Rows[index].Cells["查询时间"].Value = DateTime.Now.ToString();
|
||||
Application.DoEvents();
|
||||
}
|
||||
}
|
||||
Application.DoEvents();
|
||||
}
|
||||
state.ForeColor = Color.Green;
|
||||
state.Text = "所有关键词查询完成!";
|
||||
search.Enabled = true;
|
||||
}
|
||||
|
@ -141,9 +141,6 @@
|
||||
<metadata name="saveFile.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
||||
<value>161, 17</value>
|
||||
</metadata>
|
||||
<metadata name="backgroundWorker.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
||||
<value>262, 17</value>
|
||||
</metadata>
|
||||
<assembly alias="System.Drawing" name="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
|
||||
<data name="$this.Icon" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>
|
||||
|
298
HttpHelper.cs
298
HttpHelper.cs
@ -1,7 +1,9 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Net;
|
||||
using System.Net.Security;
|
||||
using System.Net.Sockets;
|
||||
@ -10,17 +12,16 @@ using System.Security.Cryptography.X509Certificates;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
/************************************************************************/
|
||||
/* Author:huliang
|
||||
* Email:huliang@yahoo.cn
|
||||
* QQ:12658501
|
||||
* 说明:转载请注明出处
|
||||
/************************************************************************/
|
||||
|
||||
namespace EnAliKeywordSearch
|
||||
{
|
||||
public class HttpArgs
|
||||
{
|
||||
public enum HttpMethod
|
||||
{
|
||||
GET,
|
||||
POST
|
||||
}
|
||||
public string Url { get; set; }
|
||||
public string Host { get; set; }
|
||||
public int Port { get; set; }
|
||||
@ -29,20 +30,16 @@ namespace EnAliKeywordSearch
|
||||
public string Cookie { get; set; }
|
||||
public string Data { get; set; }
|
||||
public string UA { get; set; }
|
||||
|
||||
public HttpMethod Method { get; set; }
|
||||
}
|
||||
|
||||
public static class HttpHelper
|
||||
public class HttpHelper
|
||||
{
|
||||
public static int State = 0;
|
||||
public static string ErrMsg = string.Empty;
|
||||
/// <summary>
|
||||
/// 提交方法
|
||||
/// </summary>
|
||||
enum HttpMethod
|
||||
{
|
||||
GET,
|
||||
POST
|
||||
}
|
||||
|
||||
#region HttpWebRequest & HttpWebResponse
|
||||
|
||||
/// <summary>
|
||||
@ -51,10 +48,11 @@ namespace EnAliKeywordSearch
|
||||
/// <param name="geturl">请求地址</param>
|
||||
/// <param name="cookieser">Cookies存储器</param>
|
||||
/// <returns>请求返回的Stream</returns>
|
||||
public static string Get(string url)
|
||||
public string Get(string url)
|
||||
{
|
||||
HttpArgs args = ParseURL(url);
|
||||
string strhtml = InternalSocketHttp(args, HttpMethod.GET);
|
||||
args.Method = HttpArgs.HttpMethod.GET;
|
||||
string strhtml = InternalSocketHttp(args);
|
||||
return strhtml;
|
||||
}
|
||||
|
||||
@ -65,7 +63,7 @@ namespace EnAliKeywordSearch
|
||||
/// <param name="bytes">Post数据</param>
|
||||
/// <param name="cookieser">Cllkies存储器</param>
|
||||
/// <returns>请求返回的流</returns>
|
||||
public static string Post(string url,
|
||||
public string Post(string url,
|
||||
byte[] bytes,
|
||||
CookieContainer cookies,
|
||||
Encoding encoding)
|
||||
@ -78,7 +76,7 @@ namespace EnAliKeywordSearch
|
||||
/// </summary>
|
||||
/// <param name="strUrl">url字符串</param>
|
||||
/// <returns>host字符串</returns>
|
||||
private static HttpArgs ParseURL(string strUrl)
|
||||
private HttpArgs ParseURL(string strUrl)
|
||||
{
|
||||
HttpArgs args = new HttpArgs();
|
||||
|
||||
@ -123,146 +121,184 @@ namespace EnAliKeywordSearch
|
||||
|
||||
#region Socket
|
||||
|
||||
public static string Get(HttpArgs args)
|
||||
string InternalSocketHttp(HttpArgs args)
|
||||
{
|
||||
return InternalSocketHttp(args, HttpMethod.GET);
|
||||
}
|
||||
|
||||
public static string Post(IPEndPoint endpoint,
|
||||
HttpArgs args)
|
||||
{
|
||||
return InternalSocketHttp(args, HttpMethod.POST);
|
||||
}
|
||||
|
||||
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
|
||||
{
|
||||
|
||||
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
|
||||
using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
|
||||
{
|
||||
try
|
||||
{
|
||||
sK.Connect(args.Host, args.Port);
|
||||
if (sK.Connected)
|
||||
socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.SendTimeout, 1000);
|
||||
socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReceiveTimeout, 5000);
|
||||
socket.Connect(args.Host, args.Port);
|
||||
if (socket.Connected)
|
||||
{
|
||||
byte[] buff = ParseHttpArgs(method, args);
|
||||
if (sK.Send(buff) > 0)
|
||||
byte[] buff = ParseHttpArgs(args);
|
||||
if (socket.Send(buff) > 0)
|
||||
{
|
||||
string html = ParseResponse(sK, args);
|
||||
return html;
|
||||
List<byte> responseBytes = new List<byte>();
|
||||
byte[] buffer = new byte[1024];
|
||||
int iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);
|
||||
while (iNumber > 0)//使用了Connection: Close 所以判断长度为0 时停止接受
|
||||
{
|
||||
responseBytes.AddRange(new List<byte>(buffer));//添加数据到List
|
||||
iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据
|
||||
}
|
||||
return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine(ex.Message);
|
||||
ErrMsg = e.Message;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string ParseResponse(Socket sK, HttpArgs args)
|
||||
{
|
||||
string header = ReadHeaderProcess(sK);
|
||||
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
if (header.StartsWith("HTTP/1.1 302"))
|
||||
{
|
||||
int start = header
|
||||
.ToUpper().IndexOf("LOCATION");
|
||||
if (start > 0)
|
||||
{
|
||||
string temp = header.Substring(start, header.Length - start);
|
||||
string[] sArry = Regex.Split(temp, "\r\n");
|
||||
args.Url = sArry[0].Remove(0, 10);
|
||||
return Get(args); //注意:302协议需要重定向
|
||||
}
|
||||
}
|
||||
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
|
||||
{
|
||||
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
|
||||
int content_length = 0;
|
||||
if (start > 0)
|
||||
{
|
||||
string temp = header.Substring(start, header.Length - start);
|
||||
string[] sArry = Regex.Split(temp, "\r\n");
|
||||
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
|
||||
if (content_length > 0)
|
||||
{
|
||||
byte[] bytes = new byte[content_length];
|
||||
if (sK.Receive(bytes) > 0)
|
||||
{
|
||||
return Encoding.Default.GetString(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//不存在Content-Length协议头
|
||||
return ParseResponse(sK);
|
||||
}
|
||||
}
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 读取协议头
|
||||
/// </summary>
|
||||
/// <param name="args"></param>
|
||||
/// <returns></returns>
|
||||
static string ReadHeaderProcess(Socket sK)
|
||||
private string ParseResponse(byte[] responseBytes, HttpArgs args)
|
||||
{
|
||||
StringBuilder bulider = new StringBuilder();
|
||||
while (true)
|
||||
string responseStr = Encoding.UTF8.GetString(responseBytes);
|
||||
string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
|
||||
if (splitStr.Length == 2)
|
||||
{
|
||||
byte[] buff = new byte[1];
|
||||
int read = sK.Receive(buff, SocketFlags.None);
|
||||
if (read > 0)
|
||||
string responseHeader = splitStr[0];
|
||||
string responseBody = splitStr[1];
|
||||
|
||||
if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request"))
|
||||
{
|
||||
bulider.Append((char)buff[0]);
|
||||
State = 400;
|
||||
return string.Empty;
|
||||
}
|
||||
string temp = bulider.ToString();
|
||||
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 404"))
|
||||
{
|
||||
break;
|
||||
State = 404;
|
||||
return string.Empty;
|
||||
}
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301"))
|
||||
{
|
||||
State = 302;
|
||||
int start = responseHeader.ToUpper().IndexOf("LOCATION");
|
||||
if (start > 0)
|
||||
{
|
||||
string temp = responseHeader.Substring(start, responseHeader.Length - start);
|
||||
string[] sArry = Regex.Split(temp, "\r\n");
|
||||
args.Url = sArry[0].Remove(0, 10);
|
||||
if (args.Url == "")
|
||||
return string.Empty;
|
||||
return InternalSocketHttp(args); //注意:302协议需要重定向
|
||||
}
|
||||
}
|
||||
else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容
|
||||
{
|
||||
State = 200;
|
||||
DecompressWebPage(ref responseBytes, responseHeader);
|
||||
//转码
|
||||
responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader);
|
||||
responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody);
|
||||
}
|
||||
string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
|
||||
if (responseBodys.Length == 2)
|
||||
responseBody = responseBodys[1];
|
||||
else
|
||||
responseBody = string.Empty;
|
||||
return responseBody;
|
||||
}
|
||||
return bulider.ToString();
|
||||
}
|
||||
/// <summary>
|
||||
/// 注意:此函数可能产生死循环
|
||||
/// </summary>
|
||||
/// <param name="ssl"></param>
|
||||
/// <returns></returns>
|
||||
static string ParseResponse(Socket sK)
|
||||
{
|
||||
StringBuilder bulider = new StringBuilder();
|
||||
byte[] buff = new byte[1024];
|
||||
int len = sK.Receive(buff);
|
||||
string temp = string.Empty;
|
||||
while (len > 0)
|
||||
{
|
||||
byte[] reads = new byte[len];
|
||||
Array.Copy(buff, 0, reads, 0, len);
|
||||
bulider.Append(Encoding.Default.GetString(reads));
|
||||
temp = bulider.ToString();
|
||||
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
|
||||
{
|
||||
break;
|
||||
}
|
||||
len = sK.Receive(buff);
|
||||
}
|
||||
return temp;
|
||||
return string.Empty;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Helper
|
||||
|
||||
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args)
|
||||
/// <summary>
|
||||
/// 解压网页
|
||||
/// </summary>
|
||||
/// <param name="responseBytes">网页字节数组含http头</param>
|
||||
/// <param name="iTotalCount">数组长度</param>
|
||||
/// <param name="strHeader">Http头字符串</param>
|
||||
/// <param name="iStart">网页正文开始位置</param>
|
||||
private void DecompressWebPage(ref byte[] responseBytes, string strHeader)
|
||||
{
|
||||
Regex regZip = new Regex(@"Content-Encoding:\s+gzip[^\n]*\r\n", RegexOptions.IgnoreCase);
|
||||
|
||||
if (regZip.IsMatch(strHeader))
|
||||
{
|
||||
responseBytes = Decompress(responseBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 解压gzip网页
|
||||
/// </summary>
|
||||
/// <param name="szSource">压缩过的字符串字节数组</param>
|
||||
/// <returns>解压后的字节数组</returns>
|
||||
private byte[] Decompress(byte[] szSource)
|
||||
{
|
||||
MemoryStream msSource = new MemoryStream(szSource);
|
||||
//DeflateStream 也可以这儿
|
||||
GZipStream stream = new GZipStream(msSource, CompressionMode.Decompress);
|
||||
byte[] szTotal = new byte[40 * 1024];
|
||||
long lTotal = 0;
|
||||
byte[] buffer = new byte[8];
|
||||
int iCount = 0;
|
||||
do
|
||||
{
|
||||
iCount = stream.Read(buffer, 0, 8);
|
||||
if (szTotal.Length <= lTotal + iCount) //放大数组
|
||||
{
|
||||
byte[] temp = new byte[szTotal.Length * 10];
|
||||
szTotal.CopyTo(temp, 0);
|
||||
szTotal = temp;
|
||||
}
|
||||
buffer.CopyTo(szTotal, lTotal);
|
||||
lTotal += iCount;
|
||||
} while (iCount != 0);
|
||||
byte[] szDest = new byte[lTotal];
|
||||
Array.Copy(szTotal, 0, szDest, 0, lTotal);
|
||||
return szDest;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 根据Http头标记里面的字符编码解析字符串
|
||||
/// </summary>
|
||||
/// <param name="responseBytes">网页内容字节数组(除http头以外的内容)</param>
|
||||
/// <param name="iTotalCount">网页内容字节数组长度</param>
|
||||
/// <param name="strHeader">http头的字符串</param>
|
||||
/// <returns>转好的字符串</returns>
|
||||
private string DecodeWebStringByHttpHeader(byte[] responseBytes, string strHeader)
|
||||
{
|
||||
string strResponse = "";
|
||||
if (strHeader.Contains("charset=GBK") || strHeader.Contains("charset=gb2312"))
|
||||
{
|
||||
strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes);
|
||||
}
|
||||
else
|
||||
strResponse = Encoding.UTF8.GetString(responseBytes);
|
||||
return strResponse;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 根据网页meta标记里面的字符编码解析字符串
|
||||
/// </summary>
|
||||
/// <param name="responseBytes">网页内容字节数组(除http头以外的内容)</param>
|
||||
/// <param name="iTotalCount">网页内容字节数组长度</param>
|
||||
/// <param name="strResponse">网页内容字符串, 可能已经根据其它转码要求转换过的字符串</param>
|
||||
/// <returns>转好的字符串</returns>
|
||||
private string DecodeWebStringByHtmlPageInfo(byte[] responseBytes, string strResponse)
|
||||
{
|
||||
Regex regGB2312 = new Regex(@"<meta[^>]+Content-Type[^>]+gb2312[^>]*>", RegexOptions.IgnoreCase);
|
||||
Regex regGBK = new Regex(@"<meta[^>]+Content-Type[^>]+gbk[^>]*>", RegexOptions.IgnoreCase);
|
||||
Regex regBig5 = new Regex(@"<meta[^>]+Content-Type[^>]+Big5[^>]*>", RegexOptions.IgnoreCase);
|
||||
if (regGB2312.IsMatch(strResponse) || regGBK.IsMatch(strResponse))
|
||||
strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes);
|
||||
if (regBig5.IsMatch(strResponse))
|
||||
strResponse = Encoding.GetEncoding("Big5").GetString(responseBytes);
|
||||
return strResponse;
|
||||
}
|
||||
|
||||
private byte[] ParseHttpArgs(HttpArgs args)
|
||||
{
|
||||
StringBuilder bulider = new StringBuilder();
|
||||
if (method.Equals(HttpMethod.POST))
|
||||
if (args.Method == HttpArgs.HttpMethod.POST)
|
||||
{
|
||||
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
|
||||
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
|
||||
@ -281,13 +317,15 @@ namespace EnAliKeywordSearch
|
||||
|
||||
//bulider.AppendLine("Connection: close");
|
||||
|
||||
bulider.AppendLine("Connection: Close");
|
||||
|
||||
if (!string.IsNullOrEmpty(args.Accept))
|
||||
bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
|
||||
|
||||
if (!string.IsNullOrEmpty(args.Cookie))
|
||||
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
|
||||
|
||||
if (method.Equals(HttpMethod.POST))
|
||||
if (args.Method == HttpArgs.HttpMethod.POST)
|
||||
{
|
||||
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
|
||||
bulider.Append(args.Data);
|
||||
|
@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace EnAliKeywordSearch
|
||||
|
Loading…
Reference in New Issue
Block a user