110,536
社区成员
发帖
与我相关
我的任务
分享
string filename = "";
OpenFileDialog dlg = new OpenFileDialog();
dlg.InitialDirectory = System.Windows.Forms.Application.StartupPath;
dlg.Filter = "html文件 (*.html)|*.html";
dlg.FilterIndex = 0;
dlg.RestoreDirectory = true;
if (dlg.ShowDialog() == DialogResult.OK)
{
filename = dlg.FileName;
}
StreamReader fileStream = new StreamReader(filename, Encoding.Default);
string content = fileStream.ReadToEnd();
content = Regex.Replace(content,@"<style>[\s\S]*</style>", string.Empty);
content = Regex.Replace(content, @"<script[\s\S]*>[\s\S]*</script>", string.Empty);
content = Regex.Replace(content, @"<(?:.|\s)*?>", "");
content = Regex.Replace(content, @"\&[^\;]*\;", " ");
StreamWriter sw = File.CreateText("D:\\data.txt");
sw.Write(content);
sw.Close();
string content = System.IO.File.ReadAllText("D:\\index.html", Encoding.GetEncoding("gb2312"));
Console.WriteLine(content);
content = Regex.Replace(content, @"<style>[\s\S]*</style>", string.Empty);
content = Regex.Replace(content, @"<script[\s\S]*>[\s\S]*</script>", string.Empty);
content = Regex.Replace(content, @"<(?:.|\s)*?>", string.Empty);
content = Regex.Replace(content, @"\&[^\;]*\;", " ");
StreamWriter sw = File.CreateText("D:\\data.txt");
sw.Write(content);
sw.Close();
string filename = "";
OpenFileDialog dlg = new OpenFileDialog();
dlg.InitialDirectory = System.Windows.Forms.Application.StartupPath;
dlg.Filter = "html文件 (*.html)|*.html";
dlg.FilterIndex = 0;
dlg.RestoreDirectory = true;
if (dlg.ShowDialog() == DialogResult.OK)
{
filename = dlg.FileName;
}
StreamReader fileStream = new StreamReader(filename, Encoding.Default);
string content = fileStream.ReadToEnd();
content = Regex.Replace(@"<style>[\s\S]*</style>", string.Empty);
content = Regex.Replace(@"<script[\s\S]*>[\s\S]*</script>", string.Empty);
content = Regex.Replace(@"<(?:.|\s)*?>", string.Empty);
content = Regex.Replace(@"\&[^\;]*\;", " ");
StreamWriter sw = File.CreateText("D:\\data.txt");
sw.Write(content);
sw.Close();
WebClient wc = new WebClient();
string content = wc.DownloadString("http://www.baidu.com");
Console.WriteLine(content);
content = Regex.Replace(content, @"<style>[\s\S]*</style>", string.Empty);
content = Regex.Replace(content, @"<script[\s\S]*>[\s\S]*</script>", string.Empty);
content = Regex.Replace(content, @"<(?:.|\s)*?>", string.Empty);
content = Regex.Replace(content, @"\&[^\;]*\;", " ");
StreamWriter sw = File.CreateText("D:\\data.txt");
sw.Write(content);
sw.Close();
/*
data.txt中的内容:
百度一下,你就知道
登录新 闻网 页贴 吧知 道MP3图 片视 频
帮助高级
空间 hao123 | 更多>>
把百度设为主页企业推广 | 搜索风云榜 | 关于百度 | About Baidu 2008 Baidu 使用百度前必读 京ICP证030173号
*/