获取网页的源文件出现乱码!

huntwolf 2004-11-21 08:56:44

我用如下方法获取某网页的源文件，并保存到本地。我发现获取到的源文件中会出现一些乱码
如 "瓠?瓠?瓠韩“ 之类。这是什么原因造成的，怎么解决阿？
代码如下;

const UINT MAXBUF=1024*200;
CInternetSession session;
CHttpConnection* pConnection=NULL;
CHttpFile* pFile=NULL;
CHAR* buffer=new TCHAR[MAXBUF];
CHAR* tempbuffer=new TCHAR[1024];
strcpy(buffer,"\0");
strcpy(tempbuffer,"\0");
UINT nBytesRead=0;
UINT nByte=0;
try{
pConnection=session.GetHttpConnection(strServer,1,80,NULL,NULL);
pFile=pConnection->OpenRequest(1,strFile);
pFile->SendRequest();

}
catch(CInternetException* e)
{
e->Delete();
}

while((nByte=pFile->Read(tempbuffer,1024))!=0) //读文件到缓冲区
{
strcat(buffer,tempbuffer);
strcpy(tempbuffer,"\0");
nBytesRead+=nByte;
}
buffer[nBytesRead]='\0';
FILE* fp=fopen("C:\\source.txt","wb");//保存到本地
fwrite(buffer,nBytesRead,1,fp);
fclose(fp);

...全文

111 3 打赏收藏转发到动态举报

写回复

用AI写文章

3 条回复

切换为时间正序

请发表友善的回复…

发表回复

蒋晟 2004-11-22

打赏
举报

……没判断网页是ANSI还是UNICODE编码？

kingzai 2004-11-22

打赏
举报

//use my example to download a file.
TCHAR sz[1024];
CInternetSession session (_T("Update 1.0"), 1,
INTERNET_OPEN_TYPE_DIRECT);
CStdioFile* pFile = NULL;
CHAR szHead[] = "Accept: */*\r\n\r\n";
DWORD nRead;
CFile myFile;
if ( !myFile.Open (m_CurrUpdatePath, CFile::modeCreate | CFile::modeReadWrite,
NULL) )
{
return FALSE;
}
CString str=m_sWebURL;

{
pFile = session.OpenURL (str, 1, INTERNET_FLAG_RELOAD
|INTERNET_FLAG_TRANSFER_BINARY,
szHead, -1L);

}
do
{
nRead = pFile->Read(sz, 1023);
if (nRead != 0)
myFile.Write (sz, nRead);
}
while (nRead != 0);
myFile.Close();
pFile->Close();
if (pFile != NULL)
delete pFile;
session.Close();

3m2u 2004-11-21

打赏
举报

while((nByte=pFile->Read(tempbuffer,1024))!=0) //读文件到缓冲区
{
//这里加一句 if(nByte>=0) tempbuffer[nByte]=0; else break;
strcat(buffer,tempbuffer);
strcpy(tempbuffer,"\0");
nBytesRead+=nByte;
}
buffer[nBytesRead]='\0';