DataTx
DataTx

Reputation: 1869

How to dump data from python as JSON

I have a python program that can extract information from HTML files but I would like it dumped as a json file.

import glob
import json
from bs4 import BeautifulSoup

for filename in glob.iglob('*.html'):
    with open(filename) as f:
        soup = BeautifulSoup(f)

        price = soup.findAll('span', {"class":'bb_price'})
        title = soup.find("span", id="btAsinTitle")
        author = title.find_next("a", href=True)
        isbn = soup.find('b', text='ISBN-10:').next_sibling
        weight = soup.find('b', text='Shipping Weight:').next_sibling

        print {'title': title.get_text(),
               'author': author.get_text(),
               'isbn': isbn,
               'weight': weight,
               'price': price}

Upvotes: 0

Views: 1943

Answers (2)

m.wasowski
m.wasowski

Reputation: 6387

with open(output_filename, 'w') as f
    json.dump(data, f)

see https://docs.python.org/2/library/json.html#json.dump

Upvotes: 3

Hugh Bothwell
Hugh Bothwell

Reputation: 56624

Something like:

import glob
import json
from bs4 import BeautifulSoup

def main():
    data = []
    for filename in glob.iglob('*.html'):
        with open(filename) as f:
            soup = BeautifulSoup(f)

            title = soup.find("span", id="btAsinTitle")
            data.append({
                "title":  title.get_text(),
                "author": title.find_next("a", href=True).get_text(),
                "isbn":   soup.find('b', text='ISBN-10:').next_sibling,
                "weight": soup.find('b', text='Shipping Weight:').next_sibling,
                "price":  soup.findAll('span', {"class":'bb_price'})
            })

    with open("my_output.json", "w") as outf:
        json.dump(data, outf)

main()

Upvotes: 1

Related Questions