这里使用的是MySQLdb-Python,用着还行,嘿嘿代码看这里的from scrapy.contrib.exporter import XmlItemExporter,JsonItemExporter,JsonLinesItemExporter,CsvItemExporter
from scrapy import signals,log
from scrapy.exceptions import DropItem
import datetime,pymongo
import MySQLdb
class MysqlDBPipline(object):
def __init__(self,mysql_host,mysql_db,mysql_user,mysql_passwd):
self.mysql_host = mysql_host
self.mysql_db = mysql_db
self.mysql_user = mysql_user
self.mysql_passwd = mysql_passwd
@classmethod
def from_crawler(cls,crawler):
return cls(
mysql_host=crawler.settings.get('MYSQL_HOST'),
mysql_user=crawler.settings.get('MYSQL_USER'),
mysql_passwd=crawler.settings.get('MYSQL_PASSWD'),
mysql_db=crawler.settings.get('MYSQL_DB')
)
def open_spider(self,spider):
try:
self.conn = MySQLdb.connect(
user=self.mysql_user,
passwd=self.mysql_passwd,
db=self.mysql_db,
host=self.mysql_host,
charset="utf8",
use_unicode=True
)
self.cursor = self.conn.cursor()
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
def close_spider(self,spider):
self.cursor.close()
self.conn.close()
def process_item(self,item,spider):
valid = True
for data in item:
if not data:
valid = False
raise DropItem("Missing {0}!".format(data))
if valid:
for n in range(len(item['title'])):
try:
value = [item['title'][n],item['url'][n],item['create_time']]
self.cursor.execute('INSERT INTO itunes (title,url,create_time) VALUES (%s,%s,%s)',value)
self.conn.commit()
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
log.msg("Question added to MySQLdb database!", level=log.DEBUG, spider=spider)
return item配置文件加入ITEM_PIPELINES = {
'apple.pipelines.MysqlDBPipline':100
}