Reputation: 11
I have to get data from mongodb using python. I use pymongo to get data and I can get the data in the json format. But, I think it is called like Mongodb Extended Json like
[ { "no": 0, "isMain": false, "parent": null, "defaultLanguage": { "$oid": "584067f3102e1f03d60d67da" } } ]
I would like to have that like
[ { "no": 0, "isMain": false, "parent": null, "defaultLanguage": "584067f3102e1f03d60d67da" } ]
But I could not find that transformation.
I have used bson.json_util in order to get that json but could not go further anymore. Also, I have tried to use mongoengine_goodjson.GoodJSONEncoder but could not succeed.
import pymongo
import pprint
import os
import urllib
import json
import mongoengine_goodjson as gj
import bson
from datetime import datetime as dt
from bson import json_util, ObjectId
from bson.json_util import dumps, RELAXED_JSON_OPTIONS, STRICT_JSON_OPTIONS
dir_path = os.path.dirname(os.path.realpath(__file__))
host = "xxxhost"
port = 27017
dbname = "xxxdb"
authdb = "admin"
username = urllib.parse.quote("root")
password = urllib.parse.quote("xxxpass")
def getdata(username, password, host, port, dbname, authdb, tablenamelist):
connection_string = 'mongodb://{}:{}@{}:{}/{}?authSource={}'.format(username, password, host, port, dbname, authdb)
myclient = pymongo.MongoClient(connection_string)
mydb = myclient[dbname]
for elem in mydb.list_collection_names():
mycols = mydb[elem]
print('tablename:', elem)
with open(dir_path+'/'+elem+'.json','w') as f:
dump = dumps([doc for doc in mycols.find({},{ "_id": 0})], sort_keys=False, indent=4, default=json_util.default, json_options=RELAXED_JSON_OPTIONS)
f.write(dump)
f.close()
getdata(username, password, host, port, dbname, authdb, tablenames)
How can I transform it?
Any help would be appreciated
Upvotes: 1
Views: 2177
Reputation: 45111
The bson
package has json_util
function that allows you to process extended JSON nicely.
An example:
An extended JSON object
doc = {
"_id": {"$oid": "661fda5b8ca6b45403ad60b6"},
"timestamp": {"$numberLong": "1713363547553998848"},
"created_at": {"$date": {"$numberLong": "1713363547554"}},
}
If you try to insert the above doc directly
db.get_collection('test').insert_one(doc)
You will get error
_id fields may not contain '$'-prefixed fields: $oid is not valid for storage.
But if you process it with json_util
from bson import json_util
doc_processed = json_util.loads(json_util.dumps(doc))
db.get_collection("test").insert_one(doc_processed)
You will get it through properly.
Upvotes: 0
Reputation: 7578
You must do so manually. Here is a quick example of what happens with the 3 different output formats:
# This is the native python BigDecimal...
pythonDecimal = Decimal("923.2")
# ...but it doesn't work with BSON; you must convert:
bsonDecimal = decimal128.Decimal128(pythonDecimal)
db.foo.insert({
"intVal": 1,
"longVal": 23L,
"floatVal": 3.14159,
"dateVal": datetime.datetime.now(),
"decimalVal": bsonDecimal
})
doc1 = db.foo.find_one()
print doc1
for fmt in RELAXED_JSON_OPTIONS,STRICT_JSON_OPTIONS,CANONICAL_JSON_OPTIONS:
print fmt, dumps(doc1, sort_keys=False, indent=4, default=json_util.default , json_options=fmt)
{u'longVal': 23L, u'floatVal': 3.14159, u'intVal': 1, u'decimalVal': Decimal128('923.2'), u'_id': ObjectId('5db5d175283c3277b32f068e'), u'dateVal': datetime.datetime(2019, 10, 27, 13, 18, 45, 269000)}
JSONOptions(strict_number_long=False, datetime_representation=2, strict_uuid=True, json_mode=1, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
"longVal": 23,
"floatVal": 3.14159,
"intVal": 1,
"decimalVal": {
"$numberDecimal": "923.2"
},
"_id": {
"$oid": "5db5d175283c3277b32f068e"
},
"dateVal": {
"$date": "2019-10-27T13:18:45.269Z"
}
}
JSONOptions(strict_number_long=True, datetime_representation=2, strict_uuid=True, json_mode=0, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
"longVal": {
"$numberLong": "23"
},
"floatVal": 3.14159,
"intVal": 1,
"decimalVal": {
"$numberDecimal": "923.2"
},
"_id": {
"$oid": "5db5d175283c3277b32f068e"
},
"dateVal": {
"$date": "2019-10-27T13:18:45.269Z"
}
}
JSONOptions(strict_number_long=True, datetime_representation=1, strict_uuid=True, json_mode=2, document_class=dict, tz_aware=True, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler='strict', tzinfo=<bson.tz_util.FixedOffset object at 0x1104142d0>) {
"longVal": {
"$numberLong": "23"
},
"floatVal": {
"$numberDouble": "3.14159"
},
"intVal": {
"$numberInt": "1"
},
"decimalVal": {
"$numberDecimal": "923.2"
},
"_id": {
"$oid": "5db5d175283c3277b32f068e"
},
"dateVal": {
"$date": {
"$numberLong": "1572182325269"
}
}
}
As you can see, none of the output types attempt to "simplify" the output of decimal
, datetime
, or oid
types. The JSONOptions
class provides no further options to adjust these representations. You must "manually" turn these into a simplified (and potentially lossy form). Below is quick example:
def emitThing(v):
if v.__class__.__name__ == 'dict':
emitDict(v)
elif v.__class__.__name__ == 'list':
emitList(v)
else:
if v.__class__.__name__ in {'Int64','int','long','float','decimal','Decimal128','Decimal'}:
print str(v),
elif v.__class__.__name__ == 'datetime':
print '"%s"' % v.strftime('%Y-%m-%dT%H:%M:%SZ'),
else:
print '"%s"' % str(v),
def emitList(ll):
print "[",
first = True
for v in ll:
if not first:
print ",",
emitThing(v)
first = False
print "]",
def emitDict(dd):
print "{",
first = True
for k, v in dd.iteritems():
if not first:
print ",",
print '"%s":' % k,
emitThing(v)
first = False
print "}",
emitDict(doc1)
Upvotes: 1