高分求助,搜索中的多线程问题。顶者有分。
我做了个搜索网页中的 email地址。首先搜索这个网页中所以的链接,然后再搜索这些链接
页面中的 email地址。我先用单线陈做反应很慢啊。我想用多线程,但是我弄出来查找的
email地址就少了很多了,有时候根本就找不到了。。
各位老大给我看看。这些是原代码。
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Collections;
using System.Text.RegularExpressions;
using System.Net;
using System.Threading;
namespace WinSearchEmail
{
public partial class fSearchEamil : Form
{
public fSearchEamil()
{
InitializeComponent();
}
private ArrayList alEmail;
private void button1_Click(object sender, EventArgs e)
{
alEmail = new ArrayList();
Thread[] threads = new Thread[10];
for (int i = 0; i < 10; i++)
{
Thread t = new Thread(new ThreadStart(Thread));
threads[i] = t;
}
for (int i = 0; i < 10; i++)
{
threads[i].Start();
}
//在listbox显示
lbEmail.DataSource = alEmail;
}
private void Thread()
{
ArrayList alHttp = GetHttp(txtHttp.Text);
if (alHttp != null)
{
foreach (string strHttp in alHttp)
{
SearchEmail(strHttp);
}
}
}
/// <summary>
/// 获取网页中的所有网址
/// </summary>
/// <param name="Path">网页地址</param>
/// <returns>所含网址</returns>
public ArrayList GetHttp(string Path)
{
System.Net.WebClient client = new WebClient();
byte[] page;
try
{
page = client.DownloadData(Path);
}
catch
{
return null;
}
string content1 = System.Text.Encoding.GetEncoding("GB2312").GetString(page);
string content2 = System.Text.Encoding.UTF8.GetString(page);//GetEncoding("GB2312").GetString(page);
string content = content1 + content2;
string regex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
ArrayList _myArryList = new ArrayList();
while (enu.MoveNext() && enu.Current != null)
{
try
{
Match match = (Match)(enu.Current);
string strHttp = match.Value;
if (!_myArryList.Contains(strHttp))
{
_myArryList.Add(strHttp);
}
}
catch
{
}
}
return _myArryList;
}
/// <summary>
/// 获取所有网页中的email
/// </summary>
/// <param name="Path">网页地址</param>
/// <returns>所含email</returns>
public void SearchEmail(string Path)
{
System.Net.WebClient client = new WebClient();
byte[] page;
try
{
page = client.DownloadData(Path);
}
catch
{
return;
}
string content1 = System.Text.Encoding.GetEncoding("GB2312").GetString(page);
string content2 = System.Text.Encoding.UTF8.GetString(page);//GetEncoding("GB2312").GetString(page);
string content = content1 + content2;
string regex = @"\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*";
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
while (enu.MoveNext() && enu.Current != null)
{
try
{
Match match = (Match)(enu.Current);
string strEmail = match.Value;
if (!alEmail.Contains(strEmail))
{
alEmail.Add(strEmail);
}
}
catch
{
}
}
}
}
}