user11441980
user11441980

Reputation: 11

How do I transform Mongodb Extended json into json format like good json using python?

I have to get data from mongodb using python. I use pymongo to get data and I can get the data in the json format. But, I think it is called like Mongodb Extended Json like

[ { "no": 0, "isMain": false, "parent": null, "defaultLanguage": { "$oid": "584067f3102e1f03d60d67da" } } ]

I would like to have that like

[ { "no": 0, "isMain": false, "parent": null, "defaultLanguage": "584067f3102e1f03d60d67da" } ]

But I could not find that transformation.

I have used bson.json_util in order to get that json but could not go further anymore. Also, I have tried to use mongoengine_goodjson.GoodJSONEncoder but could not succeed.

import pymongo
import pprint
import os
import urllib
import json
import mongoengine_goodjson as gj
import bson

from datetime import datetime as dt
from bson import json_util, ObjectId
from bson.json_util import dumps, RELAXED_JSON_OPTIONS, STRICT_JSON_OPTIONS


dir_path = os.path.dirname(os.path.realpath(__file__))
host = "xxxhost"
port = 27017
dbname = "xxxdb"
authdb = "admin"
username = urllib.parse.quote("root")
password = urllib.parse.quote("xxxpass")


def getdata(username, password, host, port, dbname, authdb, tablenamelist):
    connection_string = 'mongodb://{}:{}@{}:{}/{}?authSource={}'.format(username, password, host, port, dbname, authdb)
    myclient = pymongo.MongoClient(connection_string)
    mydb = myclient[dbname]
    for elem in mydb.list_collection_names():
        mycols = mydb[elem]
        print('tablename:', elem)
        with open(dir_path+'/'+elem+'.json','w') as f:
            dump = dumps([doc for doc in mycols.find({},{ "_id": 0})], sort_keys=False, indent=4, default=json_util.default, json_options=RELAXED_JSON_OPTIONS)
            f.write(dump)
        f.close()

getdata(username, password, host, port, dbname, authdb, tablenames)

How can I transform it?

Any help would be appreciated

Upvotes: 1

Views: 2177

Answers (2)

treecoder
treecoder

Reputation: 45111

The bson package has json_util function that allows you to process extended JSON nicely.

An example:

An extended JSON object

doc = {
    "_id": {"$oid": "661fda5b8ca6b45403ad60b6"},
    "timestamp": {"$numberLong": "1713363547553998848"},
    "created_at": {"$date": {"$numberLong": "1713363547554"}},
}

If you try to insert the above doc directly

db.get_collection('test').insert_one(doc)

You will get error

_id fields may not contain '$'-prefixed fields: $oid is not valid for storage.

But if you process it with json_util

from bson import json_util

doc_processed = json_util.loads(json_util.dumps(doc))

db.get_collection("test").insert_one(doc_processed)

You will get it through properly.

Links

Upvotes: 0

Buzz Moschetti
Buzz Moschetti

Reputation: 7578

You must do so manually. Here is a quick example of what happens with the 3 different output formats:


#  This is the native python BigDecimal...                                      
pythonDecimal = Decimal("923.2")

# ...but it doesn't work with BSON; you must convert:                           
bsonDecimal = decimal128.Decimal128(pythonDecimal)

db.foo.insert({
    "intVal": 1,
        "longVal": 23L,
    "floatVal": 3.14159,
        "dateVal": datetime.datetime.now(),
        "decimalVal": bsonDecimal
})

doc1 = db.foo.find_one()
print doc1

for fmt in RELAXED_JSON_OPTIONS,STRICT_JSON_OPTIONS,CANONICAL_JSON_OPTIONS:
    print fmt, dumps(doc1, sort_keys=False, indent=4, default=json_util.default , json_options=fmt)

{u'longVal': 23L, u'floatVal': 3.14159, u'intVal': 1, u'decimalVal': Decimal128('923.2'), u'_id': ObjectId('5db5d175283c3277b32f068e'), u'dateVal': datetime.datetime(2019, 10, 27, 13, 18, 45, 269000)}
JSONOptions(strict_number_long=False, datetime_representation=2, strict_uuid=True, json_mode=1, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
    "longVal": 23, 
    "floatVal": 3.14159, 
    "intVal": 1, 
    "decimalVal": {
        "$numberDecimal": "923.2"
    }, 
    "_id": {
        "$oid": "5db5d175283c3277b32f068e"
    }, 
    "dateVal": {
        "$date": "2019-10-27T13:18:45.269Z"
    }
}
JSONOptions(strict_number_long=True, datetime_representation=2, strict_uuid=True, json_mode=0, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
    "longVal": {
        "$numberLong": "23"
    }, 
    "floatVal": 3.14159, 
    "intVal": 1, 
    "decimalVal": {
        "$numberDecimal": "923.2"
    }, 
    "_id": {
        "$oid": "5db5d175283c3277b32f068e"
    }, 
    "dateVal": {
        "$date": "2019-10-27T13:18:45.269Z"
    }
}
JSONOptions(strict_number_long=True, datetime_representation=1, strict_uuid=True, json_mode=2, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
    "longVal": {
        "$numberLong": "23"
    }, 
    "floatVal": {
        "$numberDouble": "3.14159"
    }, 
    "intVal": {
        "$numberInt": "1"
    }, 
    "decimalVal": {
        "$numberDecimal": "923.2"
    }, 
    "_id": {
        "$oid": "5db5d175283c3277b32f068e"
    }, 
    "dateVal": {
        "$date": {
            "$numberLong": "1572182325269"
        }
    }
}

As you can see, none of the output types attempt to "simplify" the output of decimal, datetime, or oid types. The JSONOptions class provides no further options to adjust these representations. You must "manually" turn these into a simplified (and potentially lossy form). Below is quick example:

def emitThing(v):
    if v.__class__.__name__ == 'dict':
        emitDict(v)
    elif v.__class__.__name__ == 'list':
        emitList(v)
    else:
        if v.__class__.__name__ in {'Int64','int','long','float','decimal','Decimal128','Decimal'}:
            print str(v),
        elif v.__class__.__name__ == 'datetime':
            print '"%s"' % v.strftime('%Y-%m-%dT%H:%M:%SZ'),
        else:
            print '"%s"' % str(v),

def emitList(ll):
    print "[",
    first = True
    for v in ll:
        if not first:
            print ",",
        emitThing(v)
        first = False
    print "]",

def emitDict(dd):
    print "{",
    first = True
    for k, v in dd.iteritems():
        if not first:
            print ",",
        print '"%s":' % k,
        emitThing(v)
        first = False
    print "}",

emitDict(doc1)

Upvotes: 1

Related Questions