37,721
社区成员
发帖
与我相关
我的任务
分享
# -*- coding: cp936 -*-
import re
import time
import urllib2
def getInfoFromPage(pattern,page):
p = re.compile(pattern,re.M)
result = p.findall(page)
print result
return result
def getCompanyName():
url = 'http://china.machine365.com/Product/SDetails/9535604.html'
page = urllib2.urlopen(url)
data = page.read()
p1 = """(?<=<font style='color:White;font-size:14px'>).*(?=</font>)"""
companyName = getInfoFromPage(p1,data)
p2 = """(?<=<font>联系人: </font>).*(?=</div>
)"""
person = getInfoFromPage(p2,data)
p3 = """(?<=<li><span>电 话:</span>).*(?=</li>)"""
phone = getInfoFromPage(p3,data)
line = companyName[0] + ';' + person[0] + ';' + phone[0]
fd = open('info.txt','a')
fd.write(line)
fd.close()
getCompanyName()