求php远程抓取代码

epni59 2014-12-10 10:03:08


http://www.taodaxiang.com/credit/index/init

这个页面需要post查询后。在抓取查询的数据。这个用php怎么写?求代码
...全文
199 11 打赏 收藏 转发到动态 举报
写回复
用AI写文章
11 条回复
切换为时间正序
请发表友善的回复…
发表回复
epni59 2014-12-13
  • 打赏
  • 举报
回复
9楼是神人。万分感谢,可以了
indexroot 2014-12-11
  • 打赏
  • 举报
回复
json结果出来解析一下就好了
indexroot 2014-12-11
  • 打赏
  • 举报
回复

<?php
class mycurl {
     protected $_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1';
     protected $_url;
     protected $_followlocation;
     protected $_timeout;
     protected $_maxRedirects;
     protected $_cookieFileLocation = './cookie.txt';
     protected $_post;
     protected $_postFields;
     protected $_referer ="http://www.taodaxiang.com/credit/index/init";

     protected $_session;
     protected $_webpage;
     protected $_includeHeader;
     protected $_noBody;
     protected $_status;
     protected $_binaryTransfer;
     public    $authentication = 0;
     public    $auth_name      = '';
     public    $auth_pass      = '';

     public function useAuth($use){
       $this->authentication = 0;
       if($use == true) $this->authentication = 1;
     }

     public function setName($name){
       $this->auth_name = $name;
     }
     public function setPass($pass){
       $this->auth_pass = $pass;
     }

     public function __construct($url,$followlocation = true,$timeOut = 30,$maxRedirecs = 4,$binaryTransfer = false,$includeHeader = false,$noBody = false)
     {
         $this->_url = $url;
         $this->_followlocation = $followlocation;
         $this->_timeout = $timeOut;
         $this->_maxRedirects = $maxRedirecs;
         $this->_noBody = $noBody;
         $this->_includeHeader = $includeHeader;
         $this->_binaryTransfer = $binaryTransfer;

         $this->_cookieFileLocation = dirname(__FILE__).'/cookie.txt';

     }

     public function setReferer($referer){
       $this->_referer = $referer;
     }

     public function setCookiFileLocation($path)
     {
         $this->_cookieFileLocation = $path;
     }

     public function setPost ($postFields)
     {
        $this->_post = true;
        $this->_postFields = $postFields;
     }

     public function setUserAgent($userAgent)
     {
         $this->_useragent = $userAgent;
     }

     public function createCurl($url = 'nul')
     {
        if($url != 'nul'){
          $this->_url = $url;
        }

         $s = curl_init();

         curl_setopt($s,CURLOPT_URL,$this->_url);
         curl_setopt($s,CURLOPT_HTTPHEADER,array('Expect:'));
         curl_setopt($s,CURLOPT_TIMEOUT,$this->_timeout);
         curl_setopt($s,CURLOPT_MAXREDIRS,$this->_maxRedirects);
         curl_setopt($s,CURLOPT_RETURNTRANSFER,true);
         curl_setopt($s,CURLOPT_FOLLOWLOCATION,$this->_followlocation);
         curl_setopt($s,CURLOPT_COOKIEJAR,$this->_cookieFileLocation);
         curl_setopt($s,CURLOPT_COOKIEFILE,$this->_cookieFileLocation);

         if($this->authentication == 1){
           curl_setopt($s, CURLOPT_USERPWD, $this->auth_name.':'.$this->auth_pass);
         }
         if($this->_post)
         {
             curl_setopt($s,CURLOPT_POST,true);
             curl_setopt($s,CURLOPT_POSTFIELDS,$this->_postFields);

         }

         if($this->_includeHeader)
         {
               curl_setopt($s,CURLOPT_HEADER,true);
         }

         if($this->_noBody)
         {
             curl_setopt($s,CURLOPT_NOBODY,true);
         }
         /*
         if($this->_binary)
         {
             curl_setopt($s,CURLOPT_BINARYTRANSFER,true);
         }
         */
         curl_setopt($s,CURLOPT_USERAGENT,$this->_useragent);
         curl_setopt($s,CURLOPT_REFERER,$this->_referer);

         $this->_webpage = curl_exec($s);
                   $this->_status = curl_getinfo($s,CURLINFO_HTTP_CODE);
         curl_close($s);

     }

   public function getHttpStatus()
   {
       return $this->_status;
   }

   public function __tostring(){
      return $this->_webpage;
   }
}

$m=new mycurl("http://www.taodaxiang.com/credit/index/get");
$m->setReferer("http://www.taodaxiang.com/credit/index/init");

$m->setPost(Array("account"=>"cupjava"));
$m->createCurl();
echo $m;
?>
xuzuning 2014-12-10
  • 打赏
  • 举报
回复
比如取回的页面中有
<td>注册时间:<p id='info_regtime'></p></td>
<td>实名认证:<p id='info_auth'></p></td>
app_init.js 中有
$("#info_regtime").html("<span style='color:green'><b>"+data.regtime+"</b></span>");
$("#info_auth").html("<span style='color:"+color+"'><b>"+data.auth+"</b></span>");
xuzuning 2014-12-10
  • 打赏
  • 举报
回复
curl 就可以,不过查询到的信息是通过 app_init.js 动态写入页面的 所以你还需抓取 app_init.js 后自行解析
epni59 2014-12-10
  • 打赏
  • 举报
回复
没有人吗?
epni59 2014-12-10
  • 打赏
  • 举报
回复
不行。百度的都抓取失败。
zktz1 2014-12-10
  • 打赏
  • 举报
回复
建议你参考一下CURL 百度一下有很多
  • 打赏
  • 举报
回复
我试了下只能抓取到一个空页面(就一个[] 内容),应该是做了只能本站点提交表单的限制
傲雪星枫 2014-12-10
  • 打赏
  • 举报
回复
js寫入dom的,需要curl後再分析,再curl獲取的。
epni59 2014-12-10
  • 打赏
  • 举报
回复
curl 这个好像不用你说哟。php抓取三种方式,我都试过了。就是抓取失败。所以才来发帖求助

21,873

社区成员

发帖
与我相关
我的任务
社区描述
从PHP安装配置,PHP入门,PHP基础到PHP应用
社区管理员
  • 基础编程社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧