优化Socket获取流程 界面交互...

master
j502647092 2015-07-27 14:36:42 +08:00
parent d4012764f7
commit 24f77e9e79
7 changed files with 225 additions and 198 deletions

View File

@ -9,7 +9,7 @@
<AppDesignerFolder>Properties</AppDesignerFolder> <AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>AliKeywordSearch</RootNamespace> <RootNamespace>AliKeywordSearch</RootNamespace>
<AssemblyName>AliKeywordSearch</AssemblyName> <AssemblyName>AliKeywordSearch</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion> <TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment> <FileAlignment>512</FileAlignment>
<TargetFrameworkProfile /> <TargetFrameworkProfile />
</PropertyGroup> </PropertyGroup>
@ -40,10 +40,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<Reference Include="System" /> <Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Web" /> <Reference Include="System.Web" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" /> <Reference Include="System.Data" />
<Reference Include="System.Deployment" /> <Reference Include="System.Deployment" />
<Reference Include="System.Drawing" /> <Reference Include="System.Drawing" />

View File

@ -1,7 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq;
using System.Text; using System.Text;
namespace AliKeywordSearch namespace AliKeywordSearch

66
Frm_Main.Designer.cs generated
View File

@ -47,9 +47,9 @@
this.readin = new System.Windows.Forms.Button(); this.readin = new System.Windows.Forms.Button();
this.openFile = new System.Windows.Forms.OpenFileDialog(); this.openFile = new System.Windows.Forms.OpenFileDialog();
this.saveFile = new System.Windows.Forms.SaveFileDialog(); this.saveFile = new System.Windows.Forms.SaveFileDialog();
this.backgroundWorker = new System.ComponentModel.BackgroundWorker();
this.clearresult = new System.Windows.Forms.Button(); this.clearresult = new System.Windows.Forms.Button();
this.clearsearch = new System.Windows.Forms.Button(); this.clearsearch = new System.Windows.Forms.Button();
this.label2 = new System.Windows.Forms.Label();
((System.ComponentModel.ISupportInitialize)(this.SearchView)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.SearchView)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.ResultView)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.ResultView)).BeginInit();
this.SuspendLayout(); this.SuspendLayout();
@ -60,10 +60,10 @@
this.SearchView.Columns.AddRange(new System.Windows.Forms.DataGridViewColumn[] { this.SearchView.Columns.AddRange(new System.Windows.Forms.DataGridViewColumn[] {
this.company, this.company,
this.keyword}); this.keyword});
this.SearchView.Location = new System.Drawing.Point(12, 54); this.SearchView.Location = new System.Drawing.Point(11, 58);
this.SearchView.Name = "SearchView"; this.SearchView.Name = "SearchView";
this.SearchView.RowTemplate.Height = 23; this.SearchView.RowTemplate.Height = 23;
this.SearchView.Size = new System.Drawing.Size(843, 241); this.SearchView.Size = new System.Drawing.Size(850, 200);
this.SearchView.TabIndex = 0; this.SearchView.TabIndex = 0;
// //
// company // company
@ -86,10 +86,10 @@
this., this.,
this., this.,
this.}); this.});
this.ResultView.Location = new System.Drawing.Point(12, 322); this.ResultView.Location = new System.Drawing.Point(11, 293);
this.ResultView.Name = "ResultView"; this.ResultView.Name = "ResultView";
this.ResultView.RowTemplate.Height = 23; this.ResultView.RowTemplate.Height = 23;
this.ResultView.Size = new System.Drawing.Size(843, 460); this.ResultView.Size = new System.Drawing.Size(850, 300);
this.ResultView.TabIndex = 1; this.ResultView.TabIndex = 1;
// //
// 公司名称 // 公司名称
@ -118,7 +118,7 @@
// //
// search // search
// //
this.search.Location = new System.Drawing.Point(399, 9); this.search.Location = new System.Drawing.Point(360, 10);
this.search.Name = "search"; this.search.Name = "search";
this.search.Size = new System.Drawing.Size(152, 36); this.search.Size = new System.Drawing.Size(152, 36);
this.search.TabIndex = 2; this.search.TabIndex = 2;
@ -128,7 +128,7 @@
// //
// max // max
// //
this.max.Location = new System.Drawing.Point(112, 12); this.max.Location = new System.Drawing.Point(663, 10);
this.max.Name = "max"; this.max.Name = "max";
this.max.Size = new System.Drawing.Size(83, 21); this.max.Size = new System.Drawing.Size(83, 21);
this.max.TabIndex = 3; this.max.TabIndex = 3;
@ -137,8 +137,7 @@
// cnAli // cnAli
// //
this.cnAli.AutoSize = true; this.cnAli.AutoSize = true;
this.cnAli.Enabled = false; this.cnAli.Location = new System.Drawing.Point(740, 35);
this.cnAli.Location = new System.Drawing.Point(303, 19);
this.cnAli.Name = "cnAli"; this.cnAli.Name = "cnAli";
this.cnAli.Size = new System.Drawing.Size(71, 16); this.cnAli.Size = new System.Drawing.Size(71, 16);
this.cnAli.TabIndex = 5; this.cnAli.TabIndex = 5;
@ -150,7 +149,7 @@
// //
this.enAli.AutoSize = true; this.enAli.AutoSize = true;
this.enAli.Checked = true; this.enAli.Checked = true;
this.enAli.Location = new System.Drawing.Point(226, 19); this.enAli.Location = new System.Drawing.Point(663, 35);
this.enAli.Name = "enAli"; this.enAli.Name = "enAli";
this.enAli.Size = new System.Drawing.Size(71, 16); this.enAli.Size = new System.Drawing.Size(71, 16);
this.enAli.TabIndex = 6; this.enAli.TabIndex = 6;
@ -161,17 +160,18 @@
// label1 // label1
// //
this.label1.AutoSize = true; this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(16, 16); this.label1.Location = new System.Drawing.Point(592, 13);
this.label1.Name = "label1"; this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(89, 12); this.label1.Size = new System.Drawing.Size(65, 12);
this.label1.TabIndex = 7; this.label1.TabIndex = 7;
this.label1.Text = "最大查询页数: "; this.label1.Text = "查询页数: ";
// //
// state // state
// //
this.state.AutoSize = true; this.state.AutoSize = true;
this.state.Font = new System.Drawing.Font("宋体", 10.5F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(134))); this.state.Font = new System.Drawing.Font("宋体", 10.5F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
this.state.Location = new System.Drawing.Point(96, 302); this.state.ForeColor = System.Drawing.SystemColors.ControlText;
this.state.Location = new System.Drawing.Point(98, 267);
this.state.Name = "state"; this.state.Name = "state";
this.state.Size = new System.Drawing.Size(37, 14); this.state.Size = new System.Drawing.Size(37, 14);
this.state.TabIndex = 8; this.state.TabIndex = 8;
@ -180,21 +180,21 @@
// //
// saveout // saveout
// //
this.saveout.Location = new System.Drawing.Point(777, 16); this.saveout.Location = new System.Drawing.Point(181, 29);
this.saveout.Name = "saveout"; this.saveout.Name = "saveout";
this.saveout.Size = new System.Drawing.Size(75, 23); this.saveout.Size = new System.Drawing.Size(73, 23);
this.saveout.TabIndex = 9; this.saveout.TabIndex = 9;
this.saveout.Text = "导出数据"; this.saveout.Text = "导出关键词";
this.saveout.UseVisualStyleBackColor = true; this.saveout.UseVisualStyleBackColor = true;
this.saveout.Click += new System.EventHandler(this.saveout_Click); this.saveout.Click += new System.EventHandler(this.saveout_Click);
// //
// readin // readin
// //
this.readin.Location = new System.Drawing.Point(615, 16); this.readin.Location = new System.Drawing.Point(19, 29);
this.readin.Name = "readin"; this.readin.Name = "readin";
this.readin.Size = new System.Drawing.Size(75, 23); this.readin.Size = new System.Drawing.Size(73, 23);
this.readin.TabIndex = 10; this.readin.TabIndex = 10;
this.readin.Text = "导入数据"; this.readin.Text = "导入关键词";
this.readin.UseVisualStyleBackColor = true; this.readin.UseVisualStyleBackColor = true;
this.readin.Click += new System.EventHandler(this.readin_Click); this.readin.Click += new System.EventHandler(this.readin_Click);
// //
@ -204,9 +204,9 @@
// //
// clearresult // clearresult
// //
this.clearresult.Location = new System.Drawing.Point(14, 299); this.clearresult.Location = new System.Drawing.Point(19, 264);
this.clearresult.Name = "clearresult"; this.clearresult.Name = "clearresult";
this.clearresult.Size = new System.Drawing.Size(68, 20); this.clearresult.Size = new System.Drawing.Size(73, 23);
this.clearresult.TabIndex = 11; this.clearresult.TabIndex = 11;
this.clearresult.Text = "清除结果"; this.clearresult.Text = "清除结果";
this.clearresult.UseVisualStyleBackColor = true; this.clearresult.UseVisualStyleBackColor = true;
@ -214,18 +214,28 @@
// //
// clearsearch // clearsearch
// //
this.clearsearch.Location = new System.Drawing.Point(696, 16); this.clearsearch.Location = new System.Drawing.Point(100, 29);
this.clearsearch.Name = "clearsearch"; this.clearsearch.Name = "clearsearch";
this.clearsearch.Size = new System.Drawing.Size(75, 23); this.clearsearch.Size = new System.Drawing.Size(73, 23);
this.clearsearch.TabIndex = 12; this.clearsearch.TabIndex = 12;
this.clearsearch.Text = "清除数据"; this.clearsearch.Text = "清除关键词";
this.clearsearch.UseVisualStyleBackColor = true; this.clearsearch.UseVisualStyleBackColor = true;
this.clearsearch.Click += new System.EventHandler(this.clearsearch_Click); this.clearsearch.Click += new System.EventHandler(this.clearsearch_Click);
// //
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(592, 37);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(65, 12);
this.label2.TabIndex = 13;
this.label2.Text = "查询类型: ";
//
// Frm_Main // Frm_Main
// //
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.None; this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.None;
this.ClientSize = new System.Drawing.Size(864, 794); this.ClientSize = new System.Drawing.Size(872, 603);
this.Controls.Add(this.label2);
this.Controls.Add(this.clearsearch); this.Controls.Add(this.clearsearch);
this.Controls.Add(this.clearresult); this.Controls.Add(this.clearresult);
this.Controls.Add(this.readin); this.Controls.Add(this.readin);
@ -239,6 +249,8 @@
this.Controls.Add(this.ResultView); this.Controls.Add(this.ResultView);
this.Controls.Add(this.SearchView); this.Controls.Add(this.SearchView);
this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon"))); this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon")));
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "Frm_Main"; this.Name = "Frm_Main";
this.Text = "阿里排名查询软件 2015-7-25"; this.Text = "阿里排名查询软件 2015-7-25";
this.Load += new System.EventHandler(this.Frm_Main_Load); this.Load += new System.EventHandler(this.Frm_Main_Load);
@ -269,9 +281,9 @@
private System.Windows.Forms.Button readin; private System.Windows.Forms.Button readin;
private System.Windows.Forms.OpenFileDialog openFile; private System.Windows.Forms.OpenFileDialog openFile;
private System.Windows.Forms.SaveFileDialog saveFile; private System.Windows.Forms.SaveFileDialog saveFile;
private System.ComponentModel.BackgroundWorker backgroundWorker;
private System.Windows.Forms.Button clearresult; private System.Windows.Forms.Button clearresult;
private System.Windows.Forms.Button clearsearch; private System.Windows.Forms.Button clearsearch;
private System.Windows.Forms.Label label2;
} }
} }

View File

@ -3,7 +3,6 @@ using System.Collections.Generic;
using System.ComponentModel; using System.ComponentModel;
using System.Data; using System.Data;
using System.Drawing; using System.Drawing;
using System.Linq;
using System.Text; using System.Text;
using System.Windows.Forms; using System.Windows.Forms;
using AliKeywordSearch; using AliKeywordSearch;
@ -15,37 +14,14 @@ namespace EnAliKeywordSearch
public Frm_Main() public Frm_Main()
{ {
InitializeComponent(); InitializeComponent();
backgroundWorker.ProgressChanged += backgroundWorker_ProgressChanged;
backgroundWorker.DoWork += backgroundWorker_DoWork;
backgroundWorker.RunWorkerCompleted += backgroundWorker_RunWorkerCompleted;
} }
private void backgroundWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
}
void backgroundWorker_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
}
private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
{
}
void DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker bw = sender as BackgroundWorker;
}
private void search_Click(object sender, EventArgs e) private void search_Click(object sender, EventArgs e)
{ {
search.Enabled = false; search.Enabled = false;
state.ForeColor = Color.Black;
state.ForeColor = Color.Red;
HttpHelper httpHelper = new HttpHelper();
foreach (DataGridViewRow item in this.SearchView.Rows) foreach (DataGridViewRow item in this.SearchView.Rows)
{ {
string cpy = item.Cells["company"].Value == null ? "" : item.Cells["company"].Value.ToString(); string cpy = item.Cells["company"].Value == null ? "" : item.Cells["company"].Value.ToString();
@ -54,12 +30,16 @@ namespace EnAliKeywordSearch
{ {
foreach (string key in keys.Split(',')) foreach (string key in keys.Split(','))
{ {
bool maybe = false;
int maxpage = this.ToInt(max.Text); int maxpage = this.ToInt(max.Text);
int index = this.ResultView.Rows.Add(); int index = this.ResultView.Rows.Add();
this.ResultView.FirstDisplayedScrollingRowIndex = index;
this.ResultView.Rows[index].Cells["公司名称"].Value = cpy; this.ResultView.Rows[index].Cells["公司名称"].Value = cpy;
this.ResultView.Rows[index].Cells["关键词"].Value = key; this.ResultView.Rows[index].Cells["关键词"].Value = key;
state.ForeColor = Color.Black;
state.Text = "正在查询 " + cpy + " 的关键词 " + key; state.Text = "正在查询 " + cpy + " 的关键词 " + key;
Application.DoEvents(); Application.DoEvents();
string pageinfo = string.Empty;
for (int i = 1; i <= maxpage; i++) for (int i = 1; i <= maxpage; i++)
{ {
string url = string.Empty; string url = string.Empty;
@ -69,30 +49,35 @@ namespace EnAliKeywordSearch
url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", key, i); url = String.Format("http://www.alibaba.com/products/F0/{0}/{1}.html", key, i);
else else
url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", key, i); url = String.Format("http://s.1688.com/selloffer/offer_search.htm?keywords={0}&beginPage={1}", key, i);
string htmldoc = HttpHelper.Get(url); string htmldoc = httpHelper.Get(url);
if (string.IsNullOrEmpty(htmldoc)) if (string.IsNullOrEmpty(htmldoc))
{ {
this.ResultView.Rows[index].Cells["排名"].Value = "网页抓取失败"; state.ForeColor = Color.Red;
break; state.Text = "关键词 " + key + " 第 " + i + " 页 网页抓取失败 错误:" + HttpHelper.ErrMsg;
maybe = true;
continue;
} }
if (htmldoc.Contains(cpy)) if (htmldoc.Contains(cpy))
{ {
this.ResultView.Rows[index].Cells["排名"].Value = "第" + i + "页"; pageinfo = "第" + i + "页";
break; break;
} }
if (i == maxpage) if (i == maxpage)
{ {
this.ResultView.Rows[index].Cells["排名"].Value = maxpage + "页以后"; pageinfo = maxpage + "页以后";
break; break;
} }
Application.DoEvents(); Application.DoEvents();
} }
this.ResultView.Rows[index].Cells["排名"].Value = pageinfo + (maybe ? "(可能不准确)" : "");
this.ResultView.Rows[index].Cells["排名"].Style.ForeColor = maybe ? Color.Red : Color.Black;
this.ResultView.Rows[index].Cells["查询时间"].Value = DateTime.Now.ToString(); this.ResultView.Rows[index].Cells["查询时间"].Value = DateTime.Now.ToString();
Application.DoEvents(); Application.DoEvents();
} }
} }
Application.DoEvents(); Application.DoEvents();
} }
state.ForeColor = Color.Green;
state.Text = "所有关键词查询完成!"; state.Text = "所有关键词查询完成!";
search.Enabled = true; search.Enabled = true;
} }

View File

@ -141,9 +141,6 @@
<metadata name="saveFile.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"> <metadata name="saveFile.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>161, 17</value> <value>161, 17</value>
</metadata> </metadata>
<metadata name="backgroundWorker.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>262, 17</value>
</metadata>
<assembly alias="System.Drawing" name="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" /> <assembly alias="System.Drawing" name="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
<data name="$this.Icon" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64"> <data name="$this.Icon" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value> <value>

View File

@ -1,7 +1,9 @@
using System; using System;
using System.Collections; using System.Collections;
using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.IO; using System.IO;
using System.IO.Compression;
using System.Net; using System.Net;
using System.Net.Security; using System.Net.Security;
using System.Net.Sockets; using System.Net.Sockets;
@ -10,17 +12,16 @@ using System.Security.Cryptography.X509Certificates;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading; using System.Threading;
/************************************************************************/
/* Author:huliang
* Email:huliang@yahoo.cn
* QQ:12658501
*
/************************************************************************/
namespace EnAliKeywordSearch namespace EnAliKeywordSearch
{ {
public class HttpArgs public class HttpArgs
{ {
public enum HttpMethod
{
GET,
POST
}
public string Url { get; set; } public string Url { get; set; }
public string Host { get; set; } public string Host { get; set; }
public int Port { get; set; } public int Port { get; set; }
@ -29,20 +30,16 @@ namespace EnAliKeywordSearch
public string Cookie { get; set; } public string Cookie { get; set; }
public string Data { get; set; } public string Data { get; set; }
public string UA { get; set; } public string UA { get; set; }
public HttpMethod Method { get; set; }
} }
public static class HttpHelper public class HttpHelper
{ {
public static int State = 0;
public static string ErrMsg = string.Empty;
/// <summary> /// <summary>
/// 提交方法 /// 提交方法
/// </summary> /// </summary>
enum HttpMethod
{
GET,
POST
}
#region HttpWebRequest & HttpWebResponse #region HttpWebRequest & HttpWebResponse
/// <summary> /// <summary>
@ -51,10 +48,11 @@ namespace EnAliKeywordSearch
/// <param name="geturl">请求地址</param> /// <param name="geturl">请求地址</param>
/// <param name="cookieser">Cookies存储器</param> /// <param name="cookieser">Cookies存储器</param>
/// <returns>请求返回的Stream</returns> /// <returns>请求返回的Stream</returns>
public static string Get(string url) public string Get(string url)
{ {
HttpArgs args = ParseURL(url); HttpArgs args = ParseURL(url);
string strhtml = InternalSocketHttp(args, HttpMethod.GET); args.Method = HttpArgs.HttpMethod.GET;
string strhtml = InternalSocketHttp(args);
return strhtml; return strhtml;
} }
@ -65,7 +63,7 @@ namespace EnAliKeywordSearch
/// <param name="bytes">Post数据</param> /// <param name="bytes">Post数据</param>
/// <param name="cookieser">Cllkies存储器</param> /// <param name="cookieser">Cllkies存储器</param>
/// <returns>请求返回的流</returns> /// <returns>请求返回的流</returns>
public static string Post(string url, public string Post(string url,
byte[] bytes, byte[] bytes,
CookieContainer cookies, CookieContainer cookies,
Encoding encoding) Encoding encoding)
@ -78,7 +76,7 @@ namespace EnAliKeywordSearch
/// </summary> /// </summary>
/// <param name="strUrl">url字符串</param> /// <param name="strUrl">url字符串</param>
/// <returns>host字符串</returns> /// <returns>host字符串</returns>
private static HttpArgs ParseURL(string strUrl) private HttpArgs ParseURL(string strUrl)
{ {
HttpArgs args = new HttpArgs(); HttpArgs args = new HttpArgs();
@ -123,146 +121,184 @@ namespace EnAliKeywordSearch
#region Socket #region Socket
public static string Get(HttpArgs args) string InternalSocketHttp(HttpArgs args)
{ {
return InternalSocketHttp(args, HttpMethod.GET); using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
}
public static string Post(IPEndPoint endpoint,
HttpArgs args)
{
return InternalSocketHttp(args, HttpMethod.POST);
}
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
{
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
{ {
try try
{ {
sK.Connect(args.Host, args.Port); socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.SendTimeout, 1000);
if (sK.Connected) socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReceiveTimeout, 5000);
socket.Connect(args.Host, args.Port);
if (socket.Connected)
{ {
byte[] buff = ParseHttpArgs(method, args); byte[] buff = ParseHttpArgs(args);
if (sK.Send(buff) > 0) if (socket.Send(buff) > 0)
{ {
string html = ParseResponse(sK, args); List<byte> responseBytes = new List<byte>();
return html; byte[] buffer = new byte[1024];
int iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);
while (iNumber > 0)//使用了Connection: Close 所以判断长度为0 时停止接受
{
responseBytes.AddRange(new List<byte>(buffer));//添加数据到List
iNumber = socket.Receive(buffer, buffer.Length, SocketFlags.None);//继续接收数据
}
return ParseResponse(responseBytes.ToArray()/*转换List为数组*/, args);
} }
} }
} }
catch (Exception ex) catch (Exception e)
{ {
Console.WriteLine(ex.Message); ErrMsg = e.Message;
} }
}
return null;
}
private static string ParseResponse(Socket sK, HttpArgs args)
{
string header = ReadHeaderProcess(sK);
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
{
return string.Empty; return string.Empty;
} }
if (header.StartsWith("HTTP/1.1 302"))
{
int start = header
.ToUpper().IndexOf("LOCATION");
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
args.Url = sArry[0].Remove(0, 10);
return Get(args); //注意302协议需要重定向
}
}
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
{
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
int content_length = 0;
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
if (content_length > 0)
{
byte[] bytes = new byte[content_length];
if (sK.Receive(bytes) > 0)
{
return Encoding.Default.GetString(bytes);
}
}
}
else
{
//不存在Content-Length协议头
return ParseResponse(sK);
}
}
return string.Empty;
} }
/// <summary> private string ParseResponse(byte[] responseBytes, HttpArgs args)
/// 读取协议头
/// </summary>
/// <param name="args"></param>
/// <returns></returns>
static string ReadHeaderProcess(Socket sK)
{ {
StringBuilder bulider = new StringBuilder(); string responseStr = Encoding.UTF8.GetString(responseBytes);
while (true) string[] splitStr = responseStr.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
if (splitStr.Length == 2)
{ {
byte[] buff = new byte[1]; string responseHeader = splitStr[0];
int read = sK.Receive(buff, SocketFlags.None); string responseBody = splitStr[1];
if (read > 0)
if (responseHeader.StartsWith("HTTP/1.1 400 Bad Request"))
{ {
bulider.Append((char)buff[0]); State = 400;
return string.Empty;
} }
string temp = bulider.ToString(); else if (responseHeader.StartsWith("HTTP/1.1 404"))
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
{ {
break; State = 404;
return string.Empty;
} }
else if (responseHeader.StartsWith("HTTP/1.1 302") || responseHeader.StartsWith("HTTP/1.1 301"))
{
State = 302;
int start = responseHeader.ToUpper().IndexOf("LOCATION");
if (start > 0)
{
string temp = responseHeader.Substring(start, responseHeader.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
args.Url = sArry[0].Remove(0, 10);
if (args.Url == "")
return string.Empty;
return InternalSocketHttp(args); //注意302协议需要重定向
}
}
else if (responseHeader.StartsWith("HTTP/1.1 200")) //读取内容
{
State = 200;
DecompressWebPage(ref responseBytes, responseHeader);
//转码
responseBody = DecodeWebStringByHttpHeader(responseBytes, responseHeader);
responseBody = DecodeWebStringByHtmlPageInfo(responseBytes, responseBody);
}
string[] responseBodys = responseBody.Split(new char[4] { '\r', '\n', '\r', '\n' }, 2);
if (responseBodys.Length == 2)
responseBody = responseBodys[1];
else
responseBody = string.Empty;
return responseBody;
} }
return bulider.ToString(); return string.Empty;
}
/// <summary>
/// 注意:此函数可能产生死循环
/// </summary>
/// <param name="ssl"></param>
/// <returns></returns>
static string ParseResponse(Socket sK)
{
StringBuilder bulider = new StringBuilder();
byte[] buff = new byte[1024];
int len = sK.Receive(buff);
string temp = string.Empty;
while (len > 0)
{
byte[] reads = new byte[len];
Array.Copy(buff, 0, reads, 0, len);
bulider.Append(Encoding.Default.GetString(reads));
temp = bulider.ToString();
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
{
break;
}
len = sK.Receive(buff);
}
return temp;
} }
#endregion #endregion
#region Helper #region Helper
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args) /// <summary>
/// 解压网页
/// </summary>
/// <param name="responseBytes">网页字节数组含http头</param>
/// <param name="iTotalCount">数组长度</param>
/// <param name="strHeader">Http头字符串</param>
/// <param name="iStart">网页正文开始位置</param>
private void DecompressWebPage(ref byte[] responseBytes, string strHeader)
{
Regex regZip = new Regex(@"Content-Encoding:\s+gzip[^\n]*\r\n", RegexOptions.IgnoreCase);
if (regZip.IsMatch(strHeader))
{
responseBytes = Decompress(responseBytes);
}
}
/// <summary>
/// 解压gzip网页
/// </summary>
/// <param name="szSource">压缩过的字符串字节数组</param>
/// <returns>解压后的字节数组</returns>
private byte[] Decompress(byte[] szSource)
{
MemoryStream msSource = new MemoryStream(szSource);
//DeflateStream 也可以这儿
GZipStream stream = new GZipStream(msSource, CompressionMode.Decompress);
byte[] szTotal = new byte[40 * 1024];
long lTotal = 0;
byte[] buffer = new byte[8];
int iCount = 0;
do
{
iCount = stream.Read(buffer, 0, 8);
if (szTotal.Length <= lTotal + iCount) //放大数组
{
byte[] temp = new byte[szTotal.Length * 10];
szTotal.CopyTo(temp, 0);
szTotal = temp;
}
buffer.CopyTo(szTotal, lTotal);
lTotal += iCount;
} while (iCount != 0);
byte[] szDest = new byte[lTotal];
Array.Copy(szTotal, 0, szDest, 0, lTotal);
return szDest;
}
/// <summary>
/// 根据Http头标记里面的字符编码解析字符串
/// </summary>
/// <param name="responseBytes">网页内容字节数组(除http头以外的内容)</param>
/// <param name="iTotalCount">网页内容字节数组长度</param>
/// <param name="strHeader">http头的字符串</param>
/// <returns>转好的字符串</returns>
private string DecodeWebStringByHttpHeader(byte[] responseBytes, string strHeader)
{
string strResponse = "";
if (strHeader.Contains("charset=GBK") || strHeader.Contains("charset=gb2312"))
{
strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes);
}
else
strResponse = Encoding.UTF8.GetString(responseBytes);
return strResponse;
}
/// <summary>
/// 根据网页meta标记里面的字符编码解析字符串
/// </summary>
/// <param name="responseBytes">网页内容字节数组(除http头以外的内容)</param>
/// <param name="iTotalCount">网页内容字节数组长度</param>
/// <param name="strResponse">网页内容字符串, 可能已经根据其它转码要求转换过的字符串</param>
/// <returns>转好的字符串</returns>
private string DecodeWebStringByHtmlPageInfo(byte[] responseBytes, string strResponse)
{
Regex regGB2312 = new Regex(@"<meta[^>]+Content-Type[^>]+gb2312[^>]*>", RegexOptions.IgnoreCase);
Regex regGBK = new Regex(@"<meta[^>]+Content-Type[^>]+gbk[^>]*>", RegexOptions.IgnoreCase);
Regex regBig5 = new Regex(@"<meta[^>]+Content-Type[^>]+Big5[^>]*>", RegexOptions.IgnoreCase);
if (regGB2312.IsMatch(strResponse) || regGBK.IsMatch(strResponse))
strResponse = Encoding.GetEncoding("GBK").GetString(responseBytes);
if (regBig5.IsMatch(strResponse))
strResponse = Encoding.GetEncoding("Big5").GetString(responseBytes);
return strResponse;
}
private byte[] ParseHttpArgs(HttpArgs args)
{ {
StringBuilder bulider = new StringBuilder(); StringBuilder bulider = new StringBuilder();
if (method.Equals(HttpMethod.POST)) if (args.Method == HttpArgs.HttpMethod.POST)
{ {
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url)); bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded"); bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
@ -281,13 +317,15 @@ namespace EnAliKeywordSearch
//bulider.AppendLine("Connection: close"); //bulider.AppendLine("Connection: close");
bulider.AppendLine("Connection: Close");
if (!string.IsNullOrEmpty(args.Accept)) if (!string.IsNullOrEmpty(args.Accept))
bulider.AppendLine(string.Format("Accept: {0}", args.Accept)); bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
if (!string.IsNullOrEmpty(args.Cookie)) if (!string.IsNullOrEmpty(args.Cookie))
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie)); bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
if (method.Equals(HttpMethod.POST)) if (args.Method == HttpArgs.HttpMethod.POST)
{ {
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length)); bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
bulider.Append(args.Data); bulider.Append(args.Data);

View File

@ -1,6 +1,5 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms; using System.Windows.Forms;
namespace EnAliKeywordSearch namespace EnAliKeywordSearch