url = 'http://news.cnad.com/html/Article/2009/0520/2009052011244679.shtml'
data = urllib.urlopen(url).read()
soup = BeautifulSoup(data)
#print soup
contents = soup.findAll('div', "newsbody")
print len(contents)
for c in contents:
print c
提示下列错误:
Traceback (most recent call last):
File "ipiao.py", line 36, in <module>
soup = BeautifulSoup(data)
File "/home/lixueshi/BeautifulSoup-3.1.0.1/BeautifulSoup.py", line 1499, in __init__
BeautifulStoneSoup.__init__(self, *args, **kwargs)
File "/home/lixueshi/BeautifulSoup-3.1.0.1/BeautifulSoup.py", line 1230, in __init__
self._feed(isHTML=isHTML)
File "/home/lixueshi/BeautifulSoup-3.1.0.1/BeautifulSoup.py", line 1263, in _feed
self.builder.feed(markup)
File "/usr/lib/python2.5/HTMLParser.py", line 108, in feed
self.goahead(0)
File "/usr/lib/python2.5/HTMLParser.py", line 148, in goahead
k = self.parse_starttag(i)
File "/usr/lib/python2.5/HTMLParser.py", line 226, in parse_starttag
endpos = self.check_for_whole_start_tag(i)
File "/usr/lib/python2.5/HTMLParser.py", line 301, in check_for_whole_start_tag
self.error("malformed start tag")
File "/usr/lib/python2.5/HTMLParser.py", line 115, in error
raise HTMLParseError(message, self.getpos())
HTMLParser.HTMLParseError: malformed start tag, at line 319, column 198
但是别的 url就可以解析,请问是这个页面不规范吗?