python 插入数据到sqlite3，有时会插入几条数据后卡着，也不报错

zd96998 2014-09-10 11:21:33

#! /usr/bin/env python

#coding=utf-8

#从雅虎问答抓取一些问答数据

import sys

import re

import urllib

import sqlite3

import time



cx = sqlite3.connect("Q&A.db")

cx.text_factory = str

cu = cx.cursor()



query  = 'insert into yahoo(number,link,question,qContent,answer) values (?,?,?,?,?)'

#插入之前一定要记得修改i的初值

i = 0

#query  = 'insert into qalist(link,) values (?,)'



#插入之前一定要记得修改i的初值

def getHtml(url):

	page = urllib.urlopen(url)

	html = page.read()

	return html



#插入之前一定要记得修改i的初值

def trans_r_p(alllist):

	qlist = []

	for u in alllist:

		rurl = 'https://answers.yahoo.com' + u + '&tab=recent&filter=intl&sort=new'

		qlist.append(rurl)

		purl = 'https://answers.yahoo.com' + u + '&tab=popular&filter=intl&sort=popular'

		qlist.append(purl)

	return qlist



#插入之前一定要记得修改i的初值

def getQurl(html):

	reg  = r'<h3 class="question-title"><a data-ylk="sec:(recent|popular);slk:q;ql:\d{14}[\d\w]{7}" href="([\s\S]*?)">'

	qurlre = re.compile(reg)

	qurllist  = re.findall(qurlre,html)

	qulist = []

	for item in qurllist:

		qulist.append(item[1])

	return qulist



#插入之前一定要记得修改i的初值

def getQA(html):

	qreg = r'<title>(.+)</title>'

	creg = r'<div class="qDetail mt14 none">([\s\S]*?)</div>'

	bareg = r'<div class="content" itemprop="text">\n([\s\S]*?)</div>'

	areg = r'<div class="content mb14" data-full="">\n([\s\S]*?)</div>'

	qre = re.compile(qreg)

	cre = re.compile(creg)

	bare = re.compile(bareg)

	are = re.compile(areg)

	q = re.findall(qre,html)

	c = re.findall(cre,html)

	ba = re.findall(bare,html)

	a = re.findall(are,html)

	return q,c,ba,a



#插入之前一定要记得修改i的初值

def filter(str):

	ps = str.split('<br>')

	s = ps[0]

	skipsteps = 1

	for item in ps:

		if skipsteps > 0:

			skipsteps -= 1

			continue

		s = s + ' ' + item

	return s



#插入之前一定要记得修改i的初值

def writedb(url):

	#qaUrl = url

	#page = 0

	#fq = open('questions.txt','a')

	#fc = open('contens.txt','a')

	#fa = open('answers.txt','a')

	#i = 0

	global i

	urls = getQurl(getHtml(url))

	#print urls

	for ur in urls:

		u = 'https://answers.yahoo.com' + ur

		q,c,ba,a = getQA(getHtml(u))

		#print len(a)

		time.sleep(15)

		if q == []:

			continue

		else:

			question = q[0]

		if a == []:

			continue

		if c == []:

			content = ''

		else:

			content = filter(c[0])

		if ba != []:

			bestanswer = filter(ba[0])

			bestcontent  = content + ' (Best)'

			i += 1

			cu.execute(query,(i,u,question,bestcontent,bestanswer))

			cx.commit()

		for ans in a:

			i += 1

			answer = filter(ans)

			cu.execute(query,(i,u,question,content,answer))			

			cx.commit()

			#插入之前一定要记得修改i的初值



 writedb('https://answers.yahoo.com/dir/index?sid=396545012&tab=recent&filter=intl&sort=new')

 print 1

# writedb('https://answers.yahoo.com/dir/index?sid=396545012&tab=popular&filter=intl&sort=popular')#1-180 finished

# print 2

# writedb('https://answers.yahoo.com/dir/index?sid=396545144&tab=recent&filter=intl&sort=new')#181-201

# print 3





cu.close()

cx.close()

#http://zhidao.baidu.com/browse/80?lm=4&pn=0#list

...全文

398 3 打赏收藏转发到动态举报

写回复

用AI写文章

3 条回复

切换为时间正序

请发表友善的回复…

发表回复

thomashtq 2014-09-12

打赏
举报

采用非阻塞模式读，或者利用超时重试机制（超过时间直接放弃，放回列表中），或者干脆超时直接放弃。

zd96998 2014-09-11

打赏
举报

引用 1 楼 thomashtq 的回复:

估计是下面这个代码段在读取某个URL是阻塞了，应该跟插入数据库卡住没关系。不信你可以注掉插入数据库的代码段，应该也会卡住。
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html

那应该怎么读取URL啊？该怎么写呢？谢谢啊

thomashtq 2014-09-11

打赏
举报

估计是下面这个代码段在读取某个URL是阻塞了，应该跟插入数据库卡住没关系。不信你可以注掉插入数据库的代码段，应该也会卡住。

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html

2. **智能选择注入技术**：根据目标网站的特点，SQLMap会选择最合适的SQL注入技术来进行攻击，包括但不限于基于布尔的盲注、基于时间的盲注、基于报错注入等。 3. **数据库识别与利用**：SQLMap能够识别多种数据库...

Python内置的sqlite3模块就是一个轻量级的数据库，不需要独立的服务器进程。它的数据库就是一个.db后缀的文件，可以跨平台直接访问，非常便捷。简直就是轻量级数据的首选数据库啊！★今天我们就来用Python实战操作...

中间多了一句 sqlite3.connect() 删除即可

一、Sqlite3、SQLAlchemy安装Sqlite3是Python3标准库不需要另外安装，只需要安装SQLAlchemy即可。本文sqlalchemy版本为1.2.12pip install sqlalchemy二、ORM操作除了第一步创建引擎时连接URL不一样，其他操作其他...

众所周知SQLite3是一个非常小巧、精巧的数据库，在用户量或者信息量不大的时候使用起来嘎嘎棒，本篇博客会对SQLite3或者说对数据库基本概念进行介绍，然后教大家如何在Python中使用SQLite3数据库。

脚本语言

37,743

社区成员

34,212

社区内容

发帖

与我相关

我的任务

社区管理员

加入社区

近7日
近30日
至今

加载中

查看更多榜单

试试用AI创作助手写篇文章吧

+ 用AI写文章