SEOKeywordSearch/Frm_Main.cs

280 lines
13 KiB
C#

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using AliKeywordSearch;
using System.Web;
using System.Reflection;
using System.Text.RegularExpressions;
using CityCraft;
namespace SEOKeywordSearch
{
public partial class Frm_Main : Form
{
List<string> searchen = new List<string> { "百度" };
public Frm_Main()
{
InitializeComponent();
}
enum SearchType
{
[UrlValue("https://www.baidu.com/s?wd={0}&pn={1}0&ie=utf-8")]
,
[UrlValue("http://cn.bing.com/search?q={0}&first={1}1&FORM=PERE1")]
,
[UrlValue("http://www.sogou.com/web?query={0}&page={1}&ie=utf8")]
}
//360 Can'tUse url = String.Format("http://www.haosou.com/s?q={0}&pn={1}&client=aff-360daohang&ie=utf-8", HttpUtility.UrlEncode(key), i);
//BINGurl = String.Format("http://cn.bing.com/search?q={0}&first={1}1&FORM=PERE1", HttpUtility.UrlEncode(key), i - 1);
//搜狗url = String.Format("http://www.sogou.com/web?query={0}&page={1}&ie=utf8", HttpUtility.UrlEncode(key), i);
//百度url = String.Format("https://www.baidu.com/s?wd={0}&pn={1}0&ie=utf-8", HttpUtility.UrlEncode(key), i - 1);
public class UrlValue : System.Attribute
{
private string _value;
public UrlValue(string value)
{
_value = value;
}
public string Value
{
get { return _value; }
}
}
public static class UrlEnum
{
public static string GetUrl(Enum value)
{
string output = null;
Type type = value.GetType();
FieldInfo fi = type.GetField(value.ToString());
UrlValue[] attrs =
fi.GetCustomAttributes(typeof(UrlValue),
false) as UrlValue[];
if (attrs.Length > 0)
{
output = attrs[0].Value;
}
return output;
}
}
private void search_Click(object sender, EventArgs e)
{
if (searchen.Count == 0)
{
MessageBox.Show("请选择启用的搜索引擎!");
return;
}
search.Enabled = false;
state.ForeColor = Color.Black;
state.ForeColor = Color.Red;
HttpHelper httpHelper = new HttpHelper();
foreach (DataGridViewRow item in this.SearchView.Rows)
{
string cpy = item.Cells["company"].Value == null ? "" : item.Cells["company"].Value.ToString();
string keys = item.Cells["keyword"].Value == null ? "" : item.Cells["keyword"].Value.ToString();
if (cpy != "" && keys != "")
{
foreach (string key in keys.Split(','))
{
if (String.IsNullOrEmpty(key))
continue;
bool maybe = false;
int maxpage = this.ToInt(max.Text);
int index = this.ResultView.Rows.Add();
this.ResultView.FirstDisplayedScrollingRowIndex = index;
this.ResultView.Rows[index].Cells["公司名称"].Value = cpy;
this.ResultView.Rows[index].Cells["关键词"].Value = key;
state.ForeColor = Color.Black;
Application.DoEvents();
string pageinfo = string.Empty;
foreach (SearchType s in Enum.GetValues(typeof(SearchType)))
{
if (!searchen.Contains(s.ToString()))
continue;
if (item.Cells[s.ToString() + "s"].Value == null)
item.Cells[s.ToString() + "s"].Value = 0;
state.ForeColor = Color.Black;
state.Text = "正在 " + s.ToString() + " 查询 " + cpy + " 的关键词 " + key;
for (int i = 1; i <= maxpage; i++)
{
string url = String.Format(UrlEnum.GetUrl(s), HttpUtility.UrlEncode(key), (s.ToString() == "搜狗" ? i : i - 1));//s搜狗的页面值不同
this.ResultView.Rows[index].Cells[s.ToString() + "r"].Value = "查询第" + i + "页...";
Application.DoEvents();
for (int k = 1; k < int.Parse(retry.Text); k++)
{
httpHelper.Send(HttpMethod.GET, url);
while (httpHelper.readyState != HttpReadyState.)
{
Application.DoEvents();
}
if (!(string.IsNullOrEmpty(httpHelper.responseBody) || httpHelper.responseBody.StartsWith("0")))
break;
state.ForeColor = Color.Red;
state.Text = "关键词 " + key + " 在 " + s.ToString() + " 第 " + i + " 页 网页抓取失败 重试第 " + k + " 次";
}
string htmldoc = httpHelper.responseBody;
if (string.IsNullOrEmpty(htmldoc))
{
state.ForeColor = Color.Red;
state.Text = "关键词 " + key + " 在 " + s.ToString() + " 第 " + i + " 页 网页抓取失败 错误:" + httpHelper.ErrMsg;
this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = Color.Red;
maybe = true;
continue;
}
if (htmldoc.StartsWith("0"))
{
pageinfo = "可能被屏蔽";
this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = Color.Red;
break;
}
string cpyencode = HttpUtility.HtmlEncode(cpy);
int findindex = htmldoc.IndexOf(cpyencode);
if (findindex > 0)
{
string check = htmldoc.Substring(findindex, 60);
while (findindex > 0)
{
check = htmldoc.Substring(findindex, 60);
Console.WriteLine(s.ToString() + " " + i + " " + check);
if (!check.Contains("\\/"))
{
break;
}
findindex = htmldoc.IndexOf(cpyencode, findindex + 1);
}
if ((check.Contains("span") || check.Contains("><") || check.Contains(".html") || check.Contains("href") || check.Contains("%")))
{
pageinfo = "第" + i + "页";
if (i == 1)
item.Cells[s.ToString() + "s"].Value = int.Parse(item.Cells[s.ToString() + "s"].Value.ToString()) + 1;
this.ResultView.Rows[index].Cells[s.ToString() + "r"].Style.ForeColor = (i == 1 ? Color.Magenta : Color.Black);
break;
}
}
if (i == maxpage)
{
pageinfo = maxpage + "页以后";
break;
}
Application.DoEvents();
}
this.ResultView.Rows[index].Cells[s.ToString() + "r"].Value = pageinfo + (maybe ? "(可能不准确)" : "");
this.ResultView.Rows[index].Cells["查询时间"].Value = DateTime.Now.ToString();
Application.DoEvents();
}
Console.WriteLine("==================" + key + "==================");
}
}
if (item.Cells["keyword"].Value != null)
{
if (item.Cells["sum"].Value == null)
item.Cells["sum"].Value = 0;
foreach (SearchType s in Enum.GetValues(typeof(SearchType)))
{
if (searchen.Contains(s.ToString()))
{
int all = int.Parse(item.Cells["sum"].Value.ToString());
int add = int.Parse(item.Cells[s.ToString() + "s"].Value.ToString());
item.Cells["sum"].Value = all + add;
}
}
}
Application.DoEvents();
}
state.ForeColor = Color.Green;
state.Text = "所有关键词查询完成!";
search.Enabled = true;
}
private int ToInt(String str)
{
int s = 0;
int.TryParse(str, out s);
return s;
}
private void Frm_Main_Load(object sender, EventArgs e)
{
}
#region 窗体事件
private void saveout_Click(object sender, EventArgs e)
{
SaveFileDialog cpm = new SaveFileDialog();
cpm.Filter = "查排名数据|*.cpm";
if (cpm.ShowDialog() == DialogResult.OK)
{
List<string> data = new List<string>();
foreach (DataGridViewRow item in this.SearchView.Rows)
{
string cpy = item.Cells["company"].Value == null ? "" : item.Cells["company"].Value.ToString();
string keys = item.Cells["keyword"].Value == null ? "" : item.Cells["keyword"].Value.ToString();
if (cpy != "" && keys != "")
data.Add(cpy + "|" + keys);
}
Config cfg = new Config(cpm.FileName);
cfg.WriteListToTextFile(data);
}
}
private void readin_Click(object sender, EventArgs e)
{
OpenFileDialog cpm = new OpenFileDialog();
cpm.Filter = "查排名数据|*.cpm";
if (cpm.ShowDialog() == DialogResult.OK)
{
Config cfg = new Config(cpm.FileName);
List<string> data = cfg.ReadTextFileToList();
string cpy = string.Empty;
string keys = string.Empty;
foreach (string item in data)
{
string[] str = item.Split('|');
if (str.Length == 2)
{
cpy = str[0];
keys = str[1];
int index = this.SearchView.Rows.Add();
this.SearchView.Rows[index].Cells["company"].Value = cpy;
this.SearchView.Rows[index].Cells["keyword"].Value = keys;
}
}
}
}
private void clearresult_Click(object sender, EventArgs e)
{
ResultView.Rows.Clear();
}
private void clearsearch_Click(object sender, EventArgs e)
{
SearchView.Rows.Clear();
}
#endregion
private void check_CheckedChanged(object sender, EventArgs e)
{
CheckBox check = (CheckBox)sender;
if (check.Checked)
searchen.Add(check.Text);
else
searchen.Remove(check.Text);
}
}
}