user10029552
user10029552

Reputation:

Scrapy insert into PostgreSQL error

I'm currently new to Python, and I'm trying to insert into my PostgreSQL with Scrapy.

I managed to scrape information from this website, but when I try to insert into PostgreSQL, I get the following error.

"self.cur.execute("""insert into real_estate(estate_title,estate_address,estate_area,estate_description,estate_price,estate_type,estate_tag,estate_seller_name,estate_seller_address,estate_seller_phone,estate_seller_mobile,estate_seller_email) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",(item['estate_title'],item['estate_address'],item['estate_area'],item['estate_description'],item['estate_price'],item['estate_type'],item['estate_tag'],item['estate_seller_name'],item['estate_seller_address'],item['estate_seller_phone'],item['estate_seller_mobile'],item['estate_seller_email']))
psycopg2.InternalError: current transaction is aborted, commands ignored until end of transaction block" error

I know that my query is wrong somewhere, but I can't find where it went wrong. I have been trying to fix this for 2 days and still can't figure out the error. I hope someone can help me.

My spider:

import scrapy
from ..items import real_estateItem
class RealEstateSpider(scrapy.Spider):
   name = "estate"
   start_urls = [
    'https://batdongsan.com.vn/nha-dat-ban/p1',
  ]

def parse(self, response):
    # follow links to estate page
    for href in response.css('div.p-title h3 a::attr(href)'):
        yield response.follow(href, self.parse_estate)

    # follow pagination links
    for href in response.css('div.background-pager-right-controls a:nth-child(2)::attr(href)'):
        yield response.follow(href, self.parse)

def parse_estate(self, response):
        for extract_with_css in response.css('div#product-detail'):
            estate_title = extract_with_css.css('h1[itemprop="name"]::text').extract_first()
            estate_address = extract_with_css.css('div.table-detail div.row:nth-child(2) div.right::text').extract_first()
            estate_price =extract_with_css.css('span.gia-title.mar-right-15 strong::text').extract_first()
            estate_area =extract_with_css.css('span.gia-title strong::text').extract_first()
            estate_description =extract_with_css.css('div.pm-desc::text').extract_first()
            estate_tag =response.css('div.tagpanel a::attr(href)').extract()
            estate_type =extract_with_css.css('div.table-detail div.row:nth-child(1) div.right::text').extract_first()
            estate_seller_name =extract_with_css.css('div#LeftMainContent__productDetail_contactName div.right::text').extract_first()
            estate_seller_address =extract_with_css.css('div#LeftMainContent__productDetail_contactAddress div.right').extract_first()
            estate_seller_phone = extract_with_css.css('div#LeftMainContent__productDetail_contactPhone div.right').extract_first()
            estate_seller_mobile = extract_with_css.css('div#LeftMainContent__productDetail_contactMobile div.right::text').extract_first()
            estate_seller_email = extract_with_css.css('div#contactEmail a::text').extract_first()

        estateItem = real_estateItem(estate_title= estate_title,estate_address=estate_address,estate_area=estate_area,estate_description=estate_description,estate_price=estate_price,estate_type=estate_type,estate_tag=estate_tag,estate_seller_name=estate_seller_name,estate_seller_address=estate_seller_address,estate_seller_phone=estate_seller_phone,estate_seller_mobile=estate_seller_mobile,estate_seller_email=estate_seller_email)
        yield estateItem

My pipeline.py:

import psycopg2
class TutorialPipeline(object):
def open_spider(self, spider):
    hostname = 'localhost'
    username = 'postgres'
    password = '123' # your password
    database = 'postgres'
    self.connection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database)
    self.cur = self.connection.cursor()

def close_spider(self, spider):
    self.cur.close()
    self.connection.close()

def process_item(self, item, spider):
    self.cur.execute("""insert into real_estate(estate_title,estate_address,estate_area,estate_description,estate_price,estate_type,estate_tag,estate_seller_name,estate_seller_address,estate_seller_phone,estate_seller_mobile,estate_seller_email) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",(item['estate_title'],item['estate_address'],item['estate_area'],item['estate_description'],item['estate_price'],item['estate_type'],item['estate_tag'],item['estate_seller_name'],item['estate_seller_address'],item['estate_seller_phone'],item['estate_seller_mobile'],item['estate_seller_email']))
    self.connection.commit()
    return item

My items.py:

class real_estateItem(scrapy.Item):
 estate_title = scrapy.Field()
 estate_address= scrapy.Field()
 estate_area = scrapy.Field()
 estate_description = scrapy.Field()
 estate_price = scrapy.Field()
 estate_type = scrapy.Field()
 estate_tag = scrapy.Field()
 estate_seller_name = scrapy.Field()
 estate_seller_address = scrapy.Field()
 estate_seller_phone = scrapy.Field()
 estate_seller_mobile = scrapy.Field()
 estate_seller_email = scrapy.Field()

And here are my columns:

database

EDIT: I found out my mistake, my code got inserted into the DB but they are white spaces to next line so i didn't see the data unless i click on a data field.I used strip() function on all my parse and now they showing on the first line instead of hiding in the second line

pgadmin4

Upvotes: 2

Views: 1023

Answers (1)

user10029552
user10029552

Reputation:

I found out my mistake, my code got inserted into the DB but they are white spaces to next line so i didn't see the data unless i click on a data field.I used strip() function on all my parse and now they showing on the first line instead of hiding in the second line

Upvotes: 2

Related Questions