1
0
mirror of https://e.coding.net/circlecloud/AliKeywordSearch.git synced 2024-11-16 00:48:59 +00:00
AliKeywordSearch/HttpHelper.cs

326 lines
11 KiB
C#
Raw Normal View History

2015-07-25 10:07:54 +00:00
using System;
using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Net.Security;
using System.Net.Sockets;
using System.Security.Authentication;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
/************************************************************************/
/* Author:huliang
* Email:huliang@yahoo.cn
* QQ:12658501
*
/************************************************************************/
namespace EnAliKeywordSearch
{
public class HttpArgs
{
public string Url { get; set; }
public string Host { get; set; }
public int Port { get; set; }
public string Accept { get; set; }
public string Referer { get; set; }
public string Cookie { get; set; }
public string Data { get; set; }
public string UA { get; set; }
}
public static class HttpHelper
{
/// <summary>
/// 提交方法
/// </summary>
enum HttpMethod
{
GET,
POST
}
#region HttpWebRequest & HttpWebResponse
/// <summary>
/// Get方法
/// </summary>
/// <param name="geturl">请求地址</param>
/// <param name="cookieser">Cookies存储器</param>
/// <returns>请求返回的Stream</returns>
public static string Get(string url)
{
HttpArgs args = ParseURL(url);
string strhtml = InternalSocketHttp(args, HttpMethod.GET);
return strhtml;
}
/// <summary>
/// Post方法
/// </summary>
/// <param name="posturl">请求地址</param>
/// <param name="bytes">Post数据</param>
/// <param name="cookieser">Cllkies存储器</param>
/// <returns>请求返回的流</returns>
public static string Post(string url,
byte[] bytes,
CookieContainer cookies,
Encoding encoding)
{
return null;
}
/// <summary>
/// 根据Url得到host
/// </summary>
/// <param name="strUrl">url字符串</param>
/// <returns>host字符串</returns>
private static HttpArgs ParseURL(string strUrl)
{
HttpArgs args = new HttpArgs();
args.Host = "";
args.Port = 80;
args.Referer = "";
args.Cookie = "";
args.Url = "";
args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;";
args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
int iIndex = strUrl.IndexOf(@"//");
if (iIndex <= 0)
return null;
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
string nohttpurl = strUrl.Substring(iIndex + 2);
string address = nohttpurl;
iIndex = nohttpurl.IndexOf(@"/");
if (iIndex > 0)
{
//www.alibaba.com:80
address = nohttpurl.Substring(0, iIndex);
args.Url = nohttpurl.Substring(iIndex);
}
iIndex = nohttpurl.IndexOf(@":");
if (iIndex > 0)
{
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":"));
args.Host = tempargs[0];
args.Port = int.Parse(tempargs[1]);
}
else
{
//www.alibaba.com:80
args.Host = address;
args.Port = 80;
}
return args;
}
#endregion
#region Socket
public static string Get(HttpArgs args)
{
return InternalSocketHttp(args, HttpMethod.GET);
}
public static string Post(IPEndPoint endpoint,
HttpArgs args)
{
return InternalSocketHttp(args, HttpMethod.POST);
}
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
{
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
{
try
{
sK.Connect(args.Host, args.Port);
if (sK.Connected)
{
byte[] buff = ParseHttpArgs(method, args);
if (sK.Send(buff) > 0)
{
string html = ParseResponse(sK, args);
return html;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
return null;
}
private static string ParseResponse(Socket sK, HttpArgs args)
{
string header = ReadHeaderProcess(sK);
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
{
return string.Empty;
}
if (header.StartsWith("HTTP/1.1 302"))
{
int start = header
.ToUpper().IndexOf("LOCATION");
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
args.Url = sArry[0].Remove(0, 10);
return Get(args); //注意302协议需要重定向
}
}
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
{
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
int content_length = 0;
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
if (content_length > 0)
{
byte[] bytes = new byte[content_length];
if (sK.Receive(bytes) > 0)
{
return Encoding.Default.GetString(bytes);
}
}
}
else
{
//不存在Content-Length协议头
return ParseResponse(sK);
}
}
return string.Empty;
}
/// <summary>
/// 读取协议头
/// </summary>
/// <param name="args"></param>
/// <returns></returns>
static string ReadHeaderProcess(Socket sK)
{
StringBuilder bulider = new StringBuilder();
while (true)
{
byte[] buff = new byte[1];
int read = sK.Receive(buff, SocketFlags.None);
if (read > 0)
{
bulider.Append((char)buff[0]);
}
string temp = bulider.ToString();
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
{
break;
}
}
return bulider.ToString();
}
/// <summary>
/// 注意:此函数可能产生死循环
/// </summary>
/// <param name="ssl"></param>
/// <returns></returns>
static string ParseResponse(Socket sK)
{
StringBuilder bulider = new StringBuilder();
byte[] buff = new byte[1024];
int len = sK.Receive(buff);
string temp = string.Empty;
while (len > 0)
{
byte[] reads = new byte[len];
Array.Copy(buff, 0, reads, 0, len);
bulider.Append(Encoding.Default.GetString(reads));
temp = bulider.ToString();
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
{
break;
}
len = sK.Receive(buff);
}
return temp;
}
#endregion
#region Helper
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args)
{
StringBuilder bulider = new StringBuilder();
if (method.Equals(HttpMethod.POST))
{
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
}
else
{
bulider.AppendLine(string.Format("GET {0} HTTP/1.1", args.Url));
}
bulider.AppendLine(string.Format("Host: {0}:{1}", args.Host, args.Port));
bulider.AppendLine("User-Agent: " + args.UA);
//"User-Agent: Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";Mozilla/5.0 (Windows NT 6.1; IE 9.0)
if (!string.IsNullOrEmpty(args.Referer))
bulider.AppendLine(string.Format("Referer: {0}", args.Referer));
//bulider.AppendLine("Connection: close");
if (!string.IsNullOrEmpty(args.Accept))
bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
if (!string.IsNullOrEmpty(args.Cookie))
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
if (method.Equals(HttpMethod.POST))
{
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
bulider.Append(args.Data);
}
else
{
bulider.Append("\r\n");
}
string header = bulider.ToString();
return Encoding.Default.GetBytes(header);
}
#endregion
}
public class MilliTimer
{
private static double times { get; set; }
public static void start()
{
times = getTotalMilliseconds();
}
public static double getTimes()
{
return getTotalMilliseconds() - times;
}
public static double getTotalMilliseconds()
{
return DateTime.Now.Subtract(DateTime.Parse("1970-1-1")).TotalMilliseconds;
}
}
}