Samuel Marks
Samuel Marks

Reputation: 1884

Escape Python to `json[]` for `COPY FROM` PostgreSQL insertion?

How do I correctly escape for json[] column insertion?

Currently I get this error, [run script with more debugging]:

COPY "my_table" ("json_arr_col", "id") FROM STDIN WITH null as 'null' DELIMITER '|'
{'json_arr_col': '{"jj":null,"text":"bop"}\n'}
Traceback (most recent call last):
  File "postgres_copy_from_array.py", line 98, in <module>
    psql_insert_copy(
  File "postgres_copy_from_array.py", line 38, in psql_insert_copy
    cur.copy_expert(sql=sql, file=s_buf)
psycopg2.errors.InvalidTextRepresentation: malformed array literal: "{"jj":null,"text":"bop"}"
DETAIL:  Unexpected array element.
CONTEXT:  COPY my_table, line 1, column json_arr_col: "{"jj":null,"text":"bop"}"

How do I escape it so the COPY FROM succeeds? - Attempts:

     elif isinstance(col, dict):
        return dumps(dumps(col, separators=(",", ":")))`

Gives:

COPY "my_table" ("json_arr_col", "id") FROM STDIN WITH null as 'null' DELIMITER '|'
{'json_arr_col': '"{\\"jj\\":null,\\"text\\":\\"bop\\"}"\n'}
Traceback (most recent call last):
  File "postgres_copy_from_array.py", line 98, in <module>
    psql_insert_copy(
  File "postgres_copy_from_array.py", line 38, in psql_insert_copy
    cur.copy_expert(sql=sql, file=s_buf)
psycopg2.errors.InvalidTextRepresentation: malformed array literal: ""{"jj":null,"text":"bop"}""
DETAIL:  Array value must start with "{" or dimension information.
CONTEXT:  COPY my_table, line 1, column json_arr_col: ""{"jj":null,"text":"bop"}""

from io import StringIO
from json import dumps

import psycopg2.sql


def psql_insert_copy(table, conn, keys, data_iter):
    with conn.cursor() as cur:
        s_buf = StringIO()
        s_buf.write(
            "\n".join(
                map(lambda l: "|".join(map(str, map(parse_col, l))), data_iter)
            )
        )
        s_buf.seek(0)

        sql = "COPY {} ({}) FROM STDIN WITH null as 'null' DELIMITER '|'".format(
            psycopg2.sql.Identifier(
                *(table.schema, table.name) if table.schema else (table.name,)
            ).as_string(cur),
            psycopg2.sql.SQL(", ")
            .join(
                map(
                    psycopg2.sql.Identifier,
                    keys[1:] if keys and keys[0] == "index" else keys,
                )
            )
            .as_string(cur),
        )
        cur.copy_expert(sql=sql, file=s_buf)

My helper function:

from functools import partial
from json import dumps
import numpy as np

def parse_col(col):
    if isinstance(col, np.ndarray):
        return parse_col(col.tolist()) if col.size > 0 else "null"
    elif isinstance(col, bool):
        return int(col)
    elif isinstance(col, bytes):
        return parse_col(col.decode("utf8"))
    elif isinstance(col, (complex, int)):
        return col
    elif isinstance(col, float):
        return int(col) if col.is_integer() else col
    elif col in (None, "{}", "[]") or not col:
        return "null"
    elif isinstance(col, str):
        return {"True": 1, "False": 0}.get(col, col)
    elif isinstance(col, (list, tuple, set, frozenset)):
        return "{{{0}{1}}}".format(
            ",".join(map(partial(dumps, separators=(",", ":")),
                         map(parse_col, col))),
            "," if len(col) == 1 else "",
        )
    elif isinstance(col, dict):
        return dumps(col, separators=(",", ":"))
    elif isinstance(col, datetime):
        return col.isoformat()
    else:
        raise NotImplementedError(type(col))

Usage:

from itertools import repeat
from collections import namedtuple

import psycopg2

conn = psycopg2.connect(
    "dbname=test_user_db user=test_user"
)
conn.cursor().execute(
    "CREATE TABLE my_table ("
    "  json_arr_col json[],"
    "  id integer generated by default as identity primary key"
    ");"
)

psql_insert_copy(
    conn=conn,
    keys=("json_arr_col", "id"),
    data_iter=repeat(({"jj": None, "text": "bop"},), 5),
    table=namedtuple("_", ("name", "schema"))("my_table", None),
)

Upvotes: 0

Views: 407

Answers (1)

Adrian Klaver
Adrian Klaver

Reputation: 19724

A short simple example:

cat js_vals.json 
[{"one": 1, "two": 2, "three": 3}]
[{"dog": "ranger", "cat": "cassie", "fish": "trout"}]


import psycopg2
from psycopg2 import sql

con = psycopg2.connect(dbname="test", host='localhost', user='postgres')
cur = con.cursor()
cur.execute("""CREATE TABLE my_table (
    json_arr_col jsonb,
    id integer generated by default as identity primary key)""")
con.commit()

# sql.Identifier("public", "my_table") becomes "public"."my_table".
sql = sql.SQL("COPY {} ({}) FROM STDIN").format(sql.Identifier("public", "my_table"), sql.Identifier("json_arr_col"))

with open("js_vals.json", "r") as js_file:
    cur.copy_expert(sql, js_file)
con.commit()

cur.execute("select * from my_table")
cur.fetchall()
[([{'one': 1, 'two': 2, 'three': 3}], 1),
 ([{'cat': 'cassie', 'dog': 'ranger', 'fish': 'trout'}], 2)]

UPDATE. Add varchar column. Did not add json[] for two reasons:

  1. I don't see the point in using a Postgres array type to hold JSON values when the json and jsonb types are available.

  2. It would probably require writing an adapter to get psycopg2 copy* to recognize that json(b)[] is desired.

cat js_vals.csv 
test|[{"one": 1, "two": 2, "three": 3}]
test2|[{"dog": "ranger", "cat": "cassie", "fish": "trout"}]


cur.execute("DROP TABLE IF EXISTS my_table")
cur.execute("""CREATE TABLE my_table (
    varchar_col varchar,
    json_arr_col jsonb,
    id integer generated by default as identity primary key)""")
con.commit()

sql = sql.SQL("COPY {} ({},{}) FROM STDIN  DELIMITER '|'").format(sql.Identifier("public", "my_table"), sql.Identifier("varchar_col"),  sql.Identifier("json_arr_col"))

with open("js_vals.csv", newline='') as csv_file:
    cur.copy_expert(sql, csv_file)
con.commit()

cur.execute("select * from my_table")

cur.fetchall()

[('test', [{'one': 1, 'two': 2, 'three': 3}], 1),
 ('test2', [{'cat': 'cassie', 'dog': 'ranger', 'fish': 'trout'}], 2)]


Upvotes: 0

Related Questions