Leo.Cruz
Leo.Cruz

Reputation: 57

python json to csv converting script?

Let me start by stating that I am new to python. I wrote a script that will convert a .json file to csv format. I managed to write a script to do the job, however I don't think that my script will work if the format of the json file was to change. My script assumes that the json file will be in the same format at all times.

<json file example>
 {
"Order":
    {
        "order_id":"8251662",
        "order_date":"2012-08-20 13:17:37",
        "order_date_shipped":"0000-00-00 00:00:00",
        "order_status":"fraudreview",
        "order_ship_firstname":"pam",
        "order_ship_lastname":"Gregorio",
        "order_ship_address1":"1533 E. Dexter St",
        "order_ship_address2":"",
        "order_ship_city":"Covina",
        "order_ship_state":"CA",
        "order_ship_zip":"91746",
        "order_ship_country":"US United States",
        "order_ship_phone":"6268936923",
        "order_ship_email":"[email protected]",
        "order_bill_firstname":"pam",
        "order_bill_lastname":"Gregorio",
        "order_bill_address1":"1533 E. Dexter St",
        "order_bill_address2":"",
        "order_bill_city":"Covina",
        "order_bill_state":"CA",
        "order_bill_zip":"91746",
        "order_bill_country":"US United States",
        "order_bill_phone":"6268936923",
        "order_bill_email":"[email protected]",
        "order_gift_message":"",
        "order_giftwrap":"0",
        "order_gift_charge":"0",
        "order_shipping":"Standard (Within 5-10 Business Days)",
        "order_tax_charge":"62.83",
        "order_tax_shipping":"0",
        "order_tax_rate":"0.0875",
        "order_shipping_charge":"7.5",
        "order_total":"788.33",
        "order_item_count":"12",
        "order_tracking":"",
        "order_carrier":"1"
    },

"Items":
    [
    {
        "item_id":"25379",
        "item_date_shipped":"",
        "item_code":"17345-J3553-J35532",
        "item_quantity":"2","item_taxable":"YES",
        "item_unit_price":"32","item_shipping":"0.67",
        "item_addcharge_price":"0",
        "item_description":" ABC Slide Bracelet: : Size: OS: Silver Sku: J35532",
        "item_quantity_returned":"0",
        "item_quantity_shipped":"0",
        "item_quantity_canceled":"0",
        "item_status":"pending",
        "item_product_id":"17345",
        "item_product_kit_id":"0",
        "item_product_sku":"J35532",
        "item_product_barcode":"881934310775",
        "item_tracking":"",
        "item_carrier":"0",
        "item_source_orderid":""
    },
    {
        "item_id":"25382",
        "item_date_shipped":"",
        "item_code":"17608-J3809-J3809C",
        "item_quantity":"1",
        "item_taxable":"YES",
        "item_unit_price":"22",
        "item_shipping":"0.23",
        "item_addcharge_price":"0",
        "item_description":" \"ABC Starter Bracelet 7 1\/4\"\"\": : Size: OS: Silver Sku: J3809C",
        "item_quantity_returned":"0",
        "item_quantity_shipped":"0",
        "item_quantity_canceled":"0",
        "item_status":"pending",
        "item_product_id":"17608",
        "item_product_kit_id":"0",
        "item_product_sku":"J3809C",
        "item_product_barcode":"881934594175",
        "item_tracking":"",
        "item_carrier":"0",
        "item_source_orderid":""
    },
    {
        "item_id":"25385",
        "item_date_shipped":"",
        "item_code":"17687-J9200-J92000",
        "item_quantity":"2",
        "item_taxable":"YES",
        "item_unit_price":"12",
        "item_shipping":"0.25",
        "item_addcharge_price":"0",
        "item_description":" ABC Cathedral Bead: : Size: OS: Silver Sku: J92000",
        "item_quantity_returned":"0",
        "item_quantity_shipped":"0",
        "item_quantity_canceled":"0",
        "item_status":"pending",
        "item_product_id":"17687",
        "item_product_kit_id":"0",
        "item_product_sku":"J92000",
        "item_product_barcode":"881934602832",
        "item_tracking":"",
        "item_carrier":"0",
        "item_source_orderid":""
    },
    {
        "item_id":"25388",
        "item_date_shipped":"",
        "item_code":"17766-J9240-J92402",
        "item_quantity":"2",
        "item_taxable":"YES",
        "item_unit_price":"22",
        "item_shipping":"0.46",
        "item_addcharge_price":"0",
        "item_description":" ABC Ice Diva Bead: : Size: OS: Silver Sku: J92402",
        "item_quantity_returned":"0",
        "item_quantity_shipped":"0",
        "item_quantity_canceled":"0",
        "item_status":"pending",
        "item_product_id":"17766",
        "item_product_kit_id":"0",
        "item_product_sku":"J92402",
        "item_product_barcode":"881934655838",
        "item_tracking":"",
        "item_carrier":"0",
        "item_source_orderid":""
    },
    ],

"FraudReasons":
    [
    {
        "order_id":"11957",
        "fraud_reason":"order total exceeds max amount"
    },
    {
        "order_id":"11957",
        "fraud_reason":"order exceeds max item count"
    }
]
}

My script currently works fine with this json file but It wont work if there is only one item or one fraudreason. Here is the code to my script.

<script code>
#!/usr/bin/python
import simplejson as json
import optparse
import pycurl
import sys
import csv

json_data = open(file)
data = json.load(json_data)
json_data.close()

csv_file = '/tmp/' + str(options.orderId) + '.csv'
orders = data['Order']
items = data['Items']
frauds = data['FraudReasons']

o = csv.writer(open(csv_file, 'w'), lineterminator=',')

o.writerow([orders['order_id'],orders['order_date'],orders['order_date_shipped'],orders['order_status'],orders['order_ship_firstname'],orders['order_ship_lastname'],orders['order_ship_address1'],orders['order_ship_address2'],orders['order_ship_city'],orders['order_ship_state'],orders['order_ship_zip'],orders['order_ship_country'],orders['order_ship_phone'],orders['order_ship_email'],orders['order_bill_firstname'],orders['order_bill_lastname'],orders['order_bill_address1'],orders['order_bill_address2'],orders['order_bill_city'],orders['order_bill_state'],orders['order_bill_zip'],orders['order_bill_country'],orders['order_bill_phone'],orders['order_bill_email'],orders['order_gift_message'],orders['order_giftwrap'],orders['order_gift_charge'],orders['order_shipping'],orders['order_tax_charge'],orders['order_tax_shipping'],orders['order_tax_rate'],orders['order_shipping_charge'],orders['order_total'],orders['order_item_count'],orders['order_tracking'],orders['order_carrier']])

for item in items:
    o.writerow([item['item_id'],item['item_date_shipped'],item['item_code'],item['item_quantity'],item['item_taxable'],item['item_unit_price'],item['item_shipping'],item['item_addcharge_price'],item['item_description'],item['item_quantity_returned'],item['item_quantity_shipped'],item['item_quantity_canceled'],item['item_status'],item['item_product_id'],item['item_product_kit_id'],item['item_product_sku'],item['item_product_barcode'],item['item_tracking'],item['item_carrier'],item['item_source_orderid']])

for fraud in frauds:
    o.writerow([fraud['fraud_reason']],)

I also have not been able to figure out how not to use the labels I hope someone can help me with this

thanks in advance.

Upvotes: 1

Views: 3011

Answers (2)

alexis
alexis

Reputation: 50210

You should ask the json-generated object (data) for the names of the fields. To retain the input order, tell json to use collections.OrderedDict instead of plain dict (requires python 2.7):

import json
from collections import OrderedDict as ordereddict

data = json.loads(open('mydata.json', object_pairs_hook=ordereddict)
orders = data['Order']
print orders.keys()  # Will print the keys in the order they were read

You can then use orders.keys() instead of your hard-coded list, either with writerow or (simpler) with csv.DictWriter.

Note that this uses the default json, not simplejson, and requires python 2.7 for the ordered_pairs_hook argument and the OrderedDict type.

Edit: Yeah, I see from the comments that you're stuck with 2.4. You can download an ordereddict from PyPi, and you can extend the JSONDecoder class and pass it with the cls argument (see here), instead of object_pairs_hook, but that's uglier and more work...

Upvotes: 0

David Eyk
David Eyk

Reputation: 12541

You may want to use csv.DictWriter:

    # It's considered best to stash the main logic of your script
    # in a main() function like this.
    def main(filename, options):
        with open(filename) as fi:
            data = json.load(fi)

        csv_file = '/tmp/' + str(options.orderId) + '.csv'
        order = data['Order']
        items = data['Items']
        frauds = data['FraudReasons']

        # Here's one way to keep this maintainable if the JSON
        # format changes, and you don't care too much about the
        # order of the fields...
        orders_fields = sorted(orders.keys())
        item_fields = sorted(items[0].keys()) if items else ()
        fraud_fields = sorted(fraud[0].keys()) if fraud else ()

        csv_options = dict(lineterminator=',')

        with open(csv_file, 'w') as fo:
            o = csv.DictWriter(fo, order_fields, **csv_options)
            o.writeheader()
            o.writerow(orders)

            fo.write('\n')  # Optional, if you want to keep them separated.
            o = csv.DictWriter(fo, item_fields, **csv_options)
            o.writeheader()
            o.writerows(items)

            fo.write('\n')  # Optional, if you want to keep them separated.
            o = csv.DictWriter(fo, fraud_fields, **csv_options)
            o.writeheader()
            o.writerows(frauds)

    # If this script is run from the command line, just run
    # main(). Here's the place to use `optparse`.
    if __name__ == '__main__':
        main(...) # You'll need to fill in the main() arguments...

If you need to specify the order of fields, assign them to a tuple like this:

orders_fields = (
    'order_id',
    'order_date',
    'order_date_shipped',
    # ... etc.
    )

Upvotes: 2

Related Questions