Python scrapy pipeline error

Question

I'm developing spider for scraping some pages and i have issues with scrapy-pipeline.. I don't know why.. What's wrong in the script?

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem
import sqlite3

con = None

class RealtybasePipeline(object):

    def __init__(self):
        self.setupDBCon()
        self.createTables()

    def process_item(self, item, spider):
        self.storeInDb(item)
        return item

    def storeInDb(self, item):
        dealerId = self.cur.lastrowid
        self.storeSrealityInfoInDb(item)
        self.storeDealerInfoInDb(item)

    def storeSrealityInfoInDb(self, item):
        self.cur.execute("INSERT INTO sreality(\
            name, \
            price, \
            url, \
            estateType, \
            adress, \
            createDate, \
            source, \
            dealerName, \
            dealerMail, \
            dealerPhoto, \
            ) \
        VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ? )", \
        ( \
            item.get('name'), 
            item.get('price'), 
            item.get('url'), 
            item.get('estateType'), 
            item.get('adress'), 
            item.get('createDate'), 
            item.get('source'), 
            item.get('dealerName'), 
            item.get('dealerMail'), 
            item.get('dealerPhoto'), 
        ))
        self.con.commit()  

    def storeDealerInfoInDb(self, item):
        self.cur.execute("INSERT INTO Actors(\
            dealerName, \
            dealerMail, \
            dealerPhoto \
            ) \
        VALUES(?,?,?)", 
        (
            item.get('dealerName'), 
            item.get('dealerMail'), 
            item.get('dealerPhoto'), 
            ))
        self.con.commit()  

    def setupDBCon(self):
        self.con = sqlite3.connect('test.db')
        self.cur = self.con.cursor() 


    # this is the class destructor. It will get called automaticly by python's garbage collecter once this class is no longer used. 
    def __del__(self):
        self.closeDB()

    # I'm currently droping the tables if they exist before I run the script each time, so that
    # I don't get duplicate info. 
    def createTables(self):
        #self.dropSrealityTable()
        #self.dropDealersTable()
        self.createSrealityTable()
        self.createDealersTable()


    def createSrealityTable(self):
        self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
            price TEXT, \
            url TEXT, \
            photo TEXT, \
            estateType TEXT, \
            adress TEXT, \
            createDate Text, \
            source TEXT, \
            dealerName TEXT, \
            dealerMail TEXT, \
            dealerPhoto TEXT, \
            )")

    def createDealersTable(self):
        self.cur.execute("CREATE TABLE IF NOT EXISTS Actors(dealerName TEXT PRIMARY KEY NOT NULL, \
            dealerMail TEXT, \
            dealerPhoto TEXT )")

    def dropSrealityTable(self):
        self.cur.execute("DROP TABLE IF EXISTS sreality")

    def dropDealersTable(self):
        self.cur.execute("DROP TABLE IF EXISTS dealers")

    def closeDB(self):
        self.con.close()

After I run the spider, it throws me some errors..

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 16, in __init__
    self.createTables()

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 83, in createTables
    self.createSrealityTable()

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 99, in createSrealityTable
    )")
sqlite3.OperationalError: near ")": syntax error

alecxe · Accepted Answer

There is an extra comma before the closing parenthesis:

self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
        price TEXT, \
        url TEXT, \
        photo TEXT, \
        estateType TEXT, \
        adress TEXT, \
        createDate Text, \
        source TEXT, \
        dealerName TEXT, \
        dealerMail TEXT, \
        dealerPhoto TEXT, \  # < HERE
        )")

As a side point, to improve readability, you can write your multi-line query in triple quotes:

self.cur.execute("""
    CREATE TABLE IF NOT EXISTS 
        sreality 
        (name TEXT PRIMARY KEY NOT NULL, 
         price TEXT, 
         url TEXT, 
         photo TEXT, 
         estateType TEXT, 
         adress TEXT, 
         createDate Text, 
         source TEXT, 
         dealerName TEXT, 
         dealerMail TEXT, 
         dealerPhoto TEXT)
""")

You may also consider abstracting it out with an ORM, like sqlalchemy.

Python scrapy pipeline error

Answers (1)

Related Questions