用CHttpConnection 取得的网乱数据是乱码(超难,高手请进来测试)

wxq4100798 2007-03-19 04:02:48
已知源网站是用gzip压缩过的

(最奇怪的是 用VC++抓取其它网页,就正常)
我用下面一段代码取数据(因为要设定http 的referer)所以用
sendrequest 方法打开数据,而不是用openurl 来打开数据

又用PHP 抓取同一网站的内容
PHP抓取出来是正确的

附两个程序的抓取代码:(请注意 VC 抓的那个内容连HTTP头都不见了,PHP有http头, 而且 两个程序写入文件,文件的内容都不一样)

VC++的抓取代码
CString ref_url = "http://www.s1122.com/app/member/FT_browse/index.php?uid=2c520623m877213l3284&langx=zh-cn&mtype=3";
CString web_url = "http://www.s1122.com/app/member/FT_browse/body_var.php?uid=2c520623m877213l3284&rtype=re&langx=zh-cn&mtype=3";
CString host = "www.s1122.com";
//host = "exshop.oicp.net";

CString sub_url = "/app/member/FT_browse/body_var.php?uid=2c520623m877213l3284&rtype=re&langx=zh-cn&mtype=3";
//sub_url = "/";

try
{
CInternetSession Session ;

CHttpConnection *pHttpConnect = Session.GetHttpConnection(host) ;

if( pHttpConnect )
{
CHttpFile* pFile = pHttpConnect->OpenRequest( CHttpConnection::HTTP_VERB_GET,
_T(sub_url),
NULL,
1,
NULL,
NULL,
INTERNET_FLAG_NO_COOKIES ); //
if ( pFile )
{

//pFile->AddRequestHeaders("Accept: image/png,*/*;q=0.5");
//pFile->AddRequestHeaders("Accept-Language: Big5");
//pFile->AddRequestHeaders("Accept-Encoding: gzip,deflate");
// pFile->AddRequestHeaders("Accept-Charset: big5");
//pFile->AddRequestHeaders("Keep-Alive: 300");
//pFile->AddRequestHeaders("Connection: keep-alive");
//pFile->AddRequestHeaders("Cookie: Key=somevalue;domain=abc.com") ;

pFile->AddRequestHeaders("Content-Type: text/html;charset=gb2312");
pFile->AddRequestHeaders("Accept: text/html");
pFile->AddRequestHeaders("Referer: "+ref_url);
pFile->AddRequestHeaders("User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
//pFile->AddRequestHeaders("Accept-Encoding: gzip, deflate");
pFile->SendRequest();

// 返回的HTML
CString s,str ;
CString tmp;
while (pFile->ReadString(s))
{
tmp = s;
str += s ;
}

CString str1;
str1 = str;
SetDlgItemText(IDC_EDIT1,str1);
FILE * fp;
fp = fopen("old.txt","w");
fwrite(str1,1,strlen(str1),fp);
fclose(fp);




fp = fopen("new.txt","w");
fwrite(str,1,strlen(str),fp);
fclose(fp);
//BIG52GBK(str.GetBuffer(strlen(str)));
//GBK2GB(str.GetBuffer(strlen(str)));
SetDlgItemText(IDC_EDIT2,str);
//AfxMessageBox(str) ;
// 取返回的COOKIE
//CString strInfo ;
//DWORD dw = 0 ;
// pFile->QueryInfo(HTTP_QUERY_SET_COOKIE ,strInfo ,&dw) ;

//if (strInfo.IsEmpty()==FALSE)
// AfxMessageBox(str) ;


pFile->Close();
delete pFile ;
}


pHttpConnect->Close() ;
delete pHttpConnect ;
}
}
catch( CInternetException *e )
{
e->Delete();
}



PHP 抓取代码

<?php
//Ƕȡwww.s1122.comģȡUID: 2c520623m877213l3284 ʱ䣺2007-03-17 15:04:05
set_time_limit(0);
$uid = "2c520623m877213l3284";
$web_url = "www.s1122.com";
//$web_url = "http://www.baidu.com/s";
$singbet_ip_address = $web_url;

$refer_url = 'http://'.$singbet_ip_address.'/app/member/FT_browse/index.php?uid='.$uid.'&langx=zh-cn&mtype=3';
$web_url = 'http://'.$singbet_ip_address.'/app/member/FT_browse/body_var.php?uid='.$uid.'&langx=zh-tw&rtype=r&mtype=3&page_no=0';

$request = "/app/member/FT_browse/body_var.php?uid=$uid&rtype=re&langx=zh-cn&mtype=3";
//echo $request;
$host = $singbet_ip_address;
//$web_url = "http://www.baidu.com/";
//$//ch = curl_init();
// set URL and other appropriate options
/*
echo $refer_url;
echo "<br>";
echo $web_url;
echo "<br>";
*/

$httpHeader = "GET $request HTTP/1.0\r\n";
$httpHeader .= "Referer: " . $refer_url . "\r\n";
$httpHeader .= "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)\r\n";

$httpHeader .= "Host: $host\r\n";
$httpHeader .= "\r\n\r\n";
$port = 80;




$fp = @fsockopen($host, $port);
$retStr = "";
if ( $fp ) {
fwrite($fp, $httpHeader);
while(! feof($fp)) {
$retStr .= fread($fp, 1024);
}
}

fclose($fp);






//echo $content;
//echo $content;



$fp = fopen("data.txt","w");
fwrite($fp,$retStr,strlen($retStr));




$retStr = explode("\r\n\r\n",$retStr);
$content = $retStr[1];
echo $content;
$content = gzinflate(substr($content,10));
$fp = fopen("data1.txt","w");
fwrite($fp,$content,strlen($content));


?>
...全文
924 8 打赏 收藏 转发到动态 举报
写回复
用AI写文章
8 条回复
切换为时间正序
请发表友善的回复…
发表回复
ohfox 2007-05-18
  • 打赏
  • 举报
回复
我也在弄相似的东西,支持楼主
wxq4100798 2007-03-21
  • 打赏
  • 举报
回复
如果成功,可以多加分 500分也可以,或者RMB 具体打我手机再谈或加qq 4100798
wxq4100798 2007-03-21
  • 打赏
  • 举报
回复
你有没有设置http refer 程序一定要设定http refer ,如不设定,程序没用,这样,你可以加我qq 或者联系手机13505143802 谢谢,
尘雨 2007-03-21
  • 打赏
  • 举报
回复
文件生成了,两个都是这段脚本
wxq4100798 2007-03-21
  • 打赏
  • 举报
回复
有意向的请加qq 4100798
尘雨 2007-03-20
  • 打赏
  • 举报
回复
<script>window.open('http://www.s1122.com/app/member/index.php','_top')</script>
你的代码测试后只得到这个
wxq4100798 2007-03-20
  • 打赏
  • 举报
回复
汗,没人知道?
wxq4100798 2007-03-20
  • 打赏
  • 举报
回复
汗,写到文件里啊,里面不是生成文件了?
你看文件里的内容

19,468

社区成员

发帖
与我相关
我的任务
社区描述
VC/MFC 图形处理/算法
社区管理员
  • 图形处理/算法社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧