Reputation: 584
I'm developing spider for scraping some pages and i have issues with scrapy-pipeline.. I don't know why.. What's wrong in the script?
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem
import sqlite3
con = None
class RealtybasePipeline(object):
def __init__(self):
self.setupDBCon()
self.createTables()
def process_item(self, item, spider):
self.storeInDb(item)
return item
def storeInDb(self, item):
dealerId = self.cur.lastrowid
self.storeSrealityInfoInDb(item)
self.storeDealerInfoInDb(item)
def storeSrealityInfoInDb(self, item):
self.cur.execute("INSERT INTO sreality(\
name, \
price, \
url, \
estateType, \
adress, \
createDate, \
source, \
dealerName, \
dealerMail, \
dealerPhoto, \
) \
VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ? )", \
( \
item.get('name'),
item.get('price'),
item.get('url'),
item.get('estateType'),
item.get('adress'),
item.get('createDate'),
item.get('source'),
item.get('dealerName'),
item.get('dealerMail'),
item.get('dealerPhoto'),
))
self.con.commit()
def storeDealerInfoInDb(self, item):
self.cur.execute("INSERT INTO Actors(\
dealerName, \
dealerMail, \
dealerPhoto \
) \
VALUES(?,?,?)",
(
item.get('dealerName'),
item.get('dealerMail'),
item.get('dealerPhoto'),
))
self.con.commit()
def setupDBCon(self):
self.con = sqlite3.connect('test.db')
self.cur = self.con.cursor()
# this is the class destructor. It will get called automaticly by python's garbage collecter once this class is no longer used.
def __del__(self):
self.closeDB()
# I'm currently droping the tables if they exist before I run the script each time, so that
# I don't get duplicate info.
def createTables(self):
#self.dropSrealityTable()
#self.dropDealersTable()
self.createSrealityTable()
self.createDealersTable()
def createSrealityTable(self):
self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
price TEXT, \
url TEXT, \
photo TEXT, \
estateType TEXT, \
adress TEXT, \
createDate Text, \
source TEXT, \
dealerName TEXT, \
dealerMail TEXT, \
dealerPhoto TEXT, \
)")
def createDealersTable(self):
self.cur.execute("CREATE TABLE IF NOT EXISTS Actors(dealerName TEXT PRIMARY KEY NOT NULL, \
dealerMail TEXT, \
dealerPhoto TEXT )")
def dropSrealityTable(self):
self.cur.execute("DROP TABLE IF EXISTS sreality")
def dropDealersTable(self):
self.cur.execute("DROP TABLE IF EXISTS dealers")
def closeDB(self):
self.con.close()
After I run the spider, it throws me some errors..
File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 16, in __init__
self.createTables()
File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 83, in createTables
self.createSrealityTable()
File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 99, in createSrealityTable
)")
sqlite3.OperationalError: near ")": syntax error
Upvotes: 1
Views: 850
Reputation: 474021
There is an extra comma before the closing parenthesis:
self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
price TEXT, \
url TEXT, \
photo TEXT, \
estateType TEXT, \
adress TEXT, \
createDate Text, \
source TEXT, \
dealerName TEXT, \
dealerMail TEXT, \
dealerPhoto TEXT, \ # < HERE
)")
As a side point, to improve readability, you can write your multi-line query in triple quotes:
self.cur.execute("""
CREATE TABLE IF NOT EXISTS
sreality
(name TEXT PRIMARY KEY NOT NULL,
price TEXT,
url TEXT,
photo TEXT,
estateType TEXT,
adress TEXT,
createDate Text,
source TEXT,
dealerName TEXT,
dealerMail TEXT,
dealerPhoto TEXT)
""")
You may also consider abstracting it out with an ORM, like sqlalchemy
.
Upvotes: 2