Reputation: 623
I have a Python script that parses a JSON file like below:
[
{
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": null,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here,
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
},
"sort": [
34872
]
},
I have removed some of the values to keep the post shorter but leaving some in to try to keep the structure.
I want to take all sub keys from the _source
key and move them up to the same level as _source
and then delete the _source
key.
My code to parse the JSON is:
import json
import logging
import logging.handlers
import os
import pymongo
from pymongo import MongoClient
def import_json(mongo_server,mongo_port, vuln_folder):
try:
logging.info('Connecting to MongoDB')
client = MongoClient(mongo_server, mongo_port)
db = client['vuln_sets']
coll = db['vulnerabilities']
logging.info('Connected to MongoDB')
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
archive_filepath = filepath + vuln_folder
filedir = os.chdir(archive_filepath)
file_count = 0
for item in os.listdir(filedir):
if item.endswith('.json'):
file_name = os.path.abspath(item)
with open(item, 'r') as currentfile:
vuln_counter = 0
duplicate_count = 0
logging.info('Currently processing ' + item)
file_count +=1
json_data = currentfile.read()
vuln_content = json.loads(json_data)
for vuln in vuln_content:
try:
del vuln['_type']
coll.insert(vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1
logging.info('Added ' + str(vuln_counter) + ' vulnerabilities for ' + item)
logging.info('Found ' + str(duplicate_count) + ' duplicate records!')
os.remove(file_name)
logging.info('Processed ' + str(file_count) + ' files')
except Exception as e:
logging.exception(e)
Which you can see already deletes one key that is not needed but that key has no needed data where as I need the sub keys from _source. I am not sure on the best way to achieve this, whether it would be programmatically correct to just re-create the JSON file with the new info but I need to keep the order of the keys and structure apart from moving the sub keys up one level.
Upvotes: 0
Views: 2297
Reputation: 623
I managed to get it working by using the following code:
for vuln in vuln_content:
try:
del vuln['_type']
new_vuln = {key: vuln[key] for key in vuln if key != '_source'}
new_vuln.update(vuln['_source'])
coll.insert(new_vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1
Upvotes: 0
Reputation: 43524
You can use the dictionary update()
function to achieve what you're trying to do, but it's important to note that dictionaries don't have an "order of the keys" - see: Key Order in Python Dictionaries.
Here's an example of one way to do this, starting with a dictionary definition.
d = {
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": None,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here",
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
}
}
# create a new dictionary with everything except the key "_source"
new_d = {key: d[key] for key in d if key != '_source'}
# add the keys/values from "_source" to new dictionary
new_d.update(d['_source']) # This will overwriting any existing keys
The output of new_d:
{'_id': 'OPENWRT-SA-000001',
'_index': 'bulletins',
'_score': None,
'_type': 'bulletin',
'affectedPackage': [{'OS': 'OpenWrt',
'OSVersion': '15.05',
'arch': 'all',
'operator': 'lt',
'packageFilename': 'UNKNOWN',
'packageName': 'bind',
'packageVersion': '9.9.8-P3-1'}],
'bulletinFamily': 'unix',
'cvelist': ['CVE-2015-8704'],
'cvss': {
'score': 7.1,
'vector': 'AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/'},
'description': 'value in here',
'edition': 1,
'href': 'https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html',
'id': 'OPENWRT-SA-000001',
'lastseen': '2016-09-26T15:45:23',
'modified': '2016-01-24T13:33:41',
'published': '2016-01-24T13:33:41',
'references': ['http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-
3193'],
'reporter': 'OpenWrt Project',
'title': 'bind: Security update (4 CVEs)',
'type': 'openwrt'}
Upvotes: 1