python xpath无法解析网站？？

qq_39700924 2017-08-26 09:20:22

刚学习爬虫，遇到问题，请高手帮忙！
下面for循环中的html变量调用geturl自定义函数时，links无法解析出内容，结果为空。
但html使用注释中的语句时，links能得出结果，为何会这样的？这两种方式基本过程不是一样的吗？

# -*- coding:utf-8 -*-

import urllib

import urllib.request

import urllib.error

from lxml import etree





#多次与目标网站链接

def geturl(url):

        user_agent='Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3373.400 QQBrowser/9.6.11866.400'

        headers={'User-Agent':user_agent}

        maxtry=3

        for n in range(maxtry):

                try:

                        url2=urllib.request.Request(url,headers=headers)

                        image1=urllib.request.urlopen(url2,timeout=5).read()

                        break

                except:

                        if n<(maxtry-1):

                                continue

                        else:

                                print("Has tried %d times to access url %s,all failed!" %(maxtry,url))

                                break

        return image1



for j in range(2):

	url='https://tieba.baidu.com/f?kw=%E7%BE%8E%E5%A5%B3&ie=utf-8&pn='+str(j)

	html=geturl(url)

        #html=urllib.request.urlopen(url).read()

	selector=etree.HTML(html)

	links=selector.xpath('//div/a[@class ="j_th_tit "]/@href')

	print(links)