37,743
社区成员
发帖
与我相关
我的任务
分享import requests
from lxml import etree
import base64
from PIL import Image
session = requests.Session()
url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
page_text = session.get(url=url, headers=headers,).text
tree = etree.HTML(page_text)
img_src = 'https://so.gushiwen.org' + tree.xpath('//*[@id="imgCode"]/@src')[0]
res = requests.get(url=img_src).content
with open('./code.gif','wb') as f:
f.write(res)
Image.open('code.gif').save('code.png')
'''
通用文字识别
'''
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=L9BNPAgFmP7RRB4D0cFlhGUi&client_secret=eqE1iCj0kcMk53lpjMv3KbBoItX5MrkG'
response = requests.get(host).json()
access_token = response['access_token']
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"
# 二进制方式打开图片文件
f = open('code.png', 'rb')
img = base64.b64encode(f.read())
params = {"image":img}
# access_token = host
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers).json()
code = response['words_result'][0]['words']
print(code)
Login_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
data = {
'__VIEWSTATE': 'Em4iCpnL3M3DrcgCu77COw33yUvSBkBcNV9Alv2+79sgFsGVhqyvPSI7LNRUv6yr3Zl1ZhvOHez60jaQmRF2sSBVKPNxXmrUeZgX1XGyQF1If1pVWgucP8Ivg+k=',
'__VIEWSTATEGENERATOR': 'C93BE1AE',
'from': 'http://so.gushiwen.org/user/collect.aspx',
'email': '1303541051@qq.com',
'pwd': 'zdy0519',
'code': code,
'denglu': '登录'
}
page_text = session.post(url=Login_url, headers=headers, data=data).text
with open('gushiwen.html','w',encoding='utf-8') as f:
f.write(page_text)