AliKeywordSearch/HttpHelper.cs

326 lines
11 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using System;
using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Net.Security;
using System.Net.Sockets;
using System.Security.Authentication;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
/************************************************************************/
/* Author:huliang
* Email:huliang@yahoo.cn
* QQ:12658501
* 说明:转载请注明出处
/************************************************************************/
namespace EnAliKeywordSearch
{
public class HttpArgs
{
public string Url { get; set; }
public string Host { get; set; }
public int Port { get; set; }
public string Accept { get; set; }
public string Referer { get; set; }
public string Cookie { get; set; }
public string Data { get; set; }
public string UA { get; set; }
}
public static class HttpHelper
{
/// <summary>
/// 提交方法
/// </summary>
enum HttpMethod
{
GET,
POST
}
#region HttpWebRequest & HttpWebResponse
/// <summary>
/// Get方法
/// </summary>
/// <param name="geturl">请求地址</param>
/// <param name="cookieser">Cookies存储器</param>
/// <returns>请求返回的Stream</returns>
public static string Get(string url)
{
HttpArgs args = ParseURL(url);
string strhtml = InternalSocketHttp(args, HttpMethod.GET);
return strhtml;
}
/// <summary>
/// Post方法
/// </summary>
/// <param name="posturl">请求地址</param>
/// <param name="bytes">Post数据</param>
/// <param name="cookieser">Cllkies存储器</param>
/// <returns>请求返回的流</returns>
public static string Post(string url,
byte[] bytes,
CookieContainer cookies,
Encoding encoding)
{
return null;
}
/// <summary>
/// 根据Url得到host
/// </summary>
/// <param name="strUrl">url字符串</param>
/// <returns>host字符串</returns>
private static HttpArgs ParseURL(string strUrl)
{
HttpArgs args = new HttpArgs();
args.Host = "";
args.Port = 80;
args.Referer = "";
args.Cookie = "";
args.Url = "";
args.Accept = "text/html";//,application/xhtml+xml,application/xml,application/json;";
args.UA = "Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";
//http://www.alibaba.com/products/Egg_Laying_Block_Machine/1.html
int iIndex = strUrl.IndexOf(@"//");
if (iIndex <= 0)
return null;
//www.alibaba.com:80/products/Egg_Laying_Block_Machine/1.html
string nohttpurl = strUrl.Substring(iIndex + 2);
string address = nohttpurl;
iIndex = nohttpurl.IndexOf(@"/");
if (iIndex > 0)
{
//www.alibaba.com:80
address = nohttpurl.Substring(0, iIndex);
args.Url = nohttpurl.Substring(iIndex);
}
iIndex = nohttpurl.IndexOf(@":");
if (iIndex > 0)
{
string[] tempargs = nohttpurl.Trim().Split(char.Parse(":"));
args.Host = tempargs[0];
args.Port = int.Parse(tempargs[1]);
}
else
{
//www.alibaba.com:80
args.Host = address;
args.Port = 80;
}
return args;
}
#endregion
#region Socket
public static string Get(HttpArgs args)
{
return InternalSocketHttp(args, HttpMethod.GET);
}
public static string Post(IPEndPoint endpoint,
HttpArgs args)
{
return InternalSocketHttp(args, HttpMethod.POST);
}
static string InternalSocketHttp(HttpArgs args, HttpMethod method)
{
using (Socket sK = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
{
try
{
sK.Connect(args.Host, args.Port);
if (sK.Connected)
{
byte[] buff = ParseHttpArgs(method, args);
if (sK.Send(buff) > 0)
{
string html = ParseResponse(sK, args);
return html;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
return null;
}
private static string ParseResponse(Socket sK, HttpArgs args)
{
string header = ReadHeaderProcess(sK);
if (header.StartsWith("HTTP/1.1 400 Bad Request"))
{
return string.Empty;
}
if (header.StartsWith("HTTP/1.1 302"))
{
int start = header
.ToUpper().IndexOf("LOCATION");
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
args.Url = sArry[0].Remove(0, 10);
return Get(args); //注意302协议需要重定向
}
}
else if (header.StartsWith("HTTP/1.1 200")) //继续读取内容
{
int start = header.ToUpper().IndexOf("CONTENT-LENGTH");
int content_length = 0;
if (start > 0)
{
string temp = header.Substring(start, header.Length - start);
string[] sArry = Regex.Split(temp, "\r\n");
content_length = Convert.ToInt32(sArry[0].Split(':')[1]);
if (content_length > 0)
{
byte[] bytes = new byte[content_length];
if (sK.Receive(bytes) > 0)
{
return Encoding.Default.GetString(bytes);
}
}
}
else
{
//不存在Content-Length协议头
return ParseResponse(sK);
}
}
return string.Empty;
}
/// <summary>
/// 读取协议头
/// </summary>
/// <param name="args"></param>
/// <returns></returns>
static string ReadHeaderProcess(Socket sK)
{
StringBuilder bulider = new StringBuilder();
while (true)
{
byte[] buff = new byte[1];
int read = sK.Receive(buff, SocketFlags.None);
if (read > 0)
{
bulider.Append((char)buff[0]);
}
string temp = bulider.ToString();
if (temp.Contains("\r\n\r\n") || temp.Contains("</html>"))
{
break;
}
}
return bulider.ToString();
}
/// <summary>
/// 注意:此函数可能产生死循环
/// </summary>
/// <param name="ssl"></param>
/// <returns></returns>
static string ParseResponse(Socket sK)
{
StringBuilder bulider = new StringBuilder();
byte[] buff = new byte[1024];
int len = sK.Receive(buff);
string temp = string.Empty;
while (len > 0)
{
byte[] reads = new byte[len];
Array.Copy(buff, 0, reads, 0, len);
bulider.Append(Encoding.Default.GetString(reads));
temp = bulider.ToString();
if (temp.ToUpper().Contains("</HTML>") || temp.Contains("\0"))
{
break;
}
len = sK.Receive(buff);
}
return temp;
}
#endregion
#region Helper
static byte[] ParseHttpArgs(HttpMethod method, HttpArgs args)
{
StringBuilder bulider = new StringBuilder();
if (method.Equals(HttpMethod.POST))
{
bulider.AppendLine(string.Format("POST {0} HTTP/1.1", args.Url));
bulider.AppendLine("Content-Type: application/x-www-form-urlencoded");
}
else
{
bulider.AppendLine(string.Format("GET {0} HTTP/1.1", args.Url));
}
bulider.AppendLine(string.Format("Host: {0}:{1}", args.Host, args.Port));
bulider.AppendLine("User-Agent: " + args.UA);
//"User-Agent: Mozilla/5.0+(Compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)";Mozilla/5.0 (Windows NT 6.1; IE 9.0)
if (!string.IsNullOrEmpty(args.Referer))
bulider.AppendLine(string.Format("Referer: {0}", args.Referer));
//bulider.AppendLine("Connection: close");
if (!string.IsNullOrEmpty(args.Accept))
bulider.AppendLine(string.Format("Accept: {0}", args.Accept));
if (!string.IsNullOrEmpty(args.Cookie))
bulider.AppendLine(string.Format("Cookie: {0}", args.Cookie));
if (method.Equals(HttpMethod.POST))
{
bulider.AppendLine(string.Format("Content-Length: {0}\r\n", Encoding.Default.GetBytes(args.Data).Length));
bulider.Append(args.Data);
}
else
{
bulider.Append("\r\n");
}
string header = bulider.ToString();
return Encoding.Default.GetBytes(header);
}
#endregion
}
public class MilliTimer
{
private static double times { get; set; }
public static void start()
{
times = getTotalMilliseconds();
}
public static double getTimes()
{
return getTotalMilliseconds() - times;
}
public static double getTotalMilliseconds()
{
return DateTime.Now.Subtract(DateTime.Parse("1970-1-1")).TotalMilliseconds;
}
}
}