16,717
社区成员
发帖
与我相关
我的任务
分享
string encodingName, charset;
string Meta_Content_Encoding = @"<meta\s+http-equiv\s*=\s*[\""'\s]?Content-Type\b.*?charset\s*=\s*(?<encodingName>[^\""'\s>]*)";
RegexOptions options = RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture;
Regex CharsetRegex = new Regex(Meta_Content_Encoding, options);
HttpWebResponse response = webReq.GetResponse() as HttpWebResponse;
encodingName = response.ContentEncoding;
charset = response.CharacterSet;
byte[] htmlData = null;
using (BinaryReader br = new BinaryReader(response.GetResponseStream()))
{
int bufferLen = 4096;
byte[] buffer = new byte[bufferLen];
MemoryStream ms = new MemoryStream();
int count = buffer.Length;
while ((count = br.Read(buffer, 0, bufferLen)) > 0)
{
ms.Write(buffer, 0, count);
}
htmlData = ms.ToArray();
}
// 先测试html文档中有没有encoding
string testHtml = Encoding.ASCII.GetString(htmlData);
Match m = CharsetRegex.Match(testHtml);
if (m.Success) // 文档中存在标有encoding的Meta信息
{
encodingName = m.Groups["encodingName"].Value;
try
{
encode = Encoding.GetEncoding(encodingName);
}
catch { }
}
//文档中不存在标有encoding的Meta信息的时候,应用HTTP头的ContentEncoding信息
if (encode == null)
{
try
{
encode = Encoding.GetEncoding(encodingName);
}
catch { }
}
//文档中不存在标有encoding的Meta信息的时候,也不存在HTTP头的ContentEncoding信息
if (encode == null)
{
try
{
encode = Encoding.GetEncoding(charset);
}
catch { }
}
// 如果找不到相应的Encoding,则使用Default
if (encode == null) encode = Encoding.Default;
htmlString = encode.GetString(htmlData);
用下面这个函数设置一下编码应该就可以了
public static string GetResponseText(string url)
{
string responseFromServer = null;
Stream dataStream = null;
StreamReader reader = null;
try
{
WebRequest request = WebRequest.Create(url);
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusDescription == "OK")
{
try
{
dataStream = response.GetResponseStream();
reader = new StreamReader(dataStream, Encoding.GetEncoding("GB2312"));//在这里设置编码格式
responseFromServer = reader.ReadToEnd();
}
finally
{
reader.Close();
dataStream.Close();
}
}
response.Close();
return responseFromServer;
}
catch (Exception ex)
{
return null;
}
}