37,743
社区成员




#coding:utf-8
import urllib.request
from lxml import etree
url="https://tieba.baidu.com/f?ie=utf-8&kw=%E7%BE%8E%E5%A5%B3&fr=search"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
}
request = urllib.request.Request(url, headers=headers)
html = urllib.request.urlopen(request).read().decode('UTF-8')
# print(html)
# 解析HTML文档为HTML DOM模型
content = etree.HTML(html)
print(content)
link_list = content.xpath('//li[@class="j_thread_list clearfix"]/div[@class="t_con cleafix"]/div[@class="col2_right j_threadlist_li_right"]/div/div/a')
print(link_list)
for link in link_list:
fulllink = "http://tieba.baidu.com" + link
print(link)