Reputation: 1869
I have a python program that can extract information from HTML files but I would like it dumped as a json file.
import glob
import json
from bs4 import BeautifulSoup
for filename in glob.iglob('*.html'):
with open(filename) as f:
soup = BeautifulSoup(f)
price = soup.findAll('span', {"class":'bb_price'})
title = soup.find("span", id="btAsinTitle")
author = title.find_next("a", href=True)
isbn = soup.find('b', text='ISBN-10:').next_sibling
weight = soup.find('b', text='Shipping Weight:').next_sibling
print {'title': title.get_text(),
'author': author.get_text(),
'isbn': isbn,
'weight': weight,
'price': price}
Upvotes: 0
Views: 1943
Reputation: 6387
with open(output_filename, 'w') as f
json.dump(data, f)
see https://docs.python.org/2/library/json.html#json.dump
Upvotes: 3
Reputation: 56624
Something like:
import glob
import json
from bs4 import BeautifulSoup
def main():
data = []
for filename in glob.iglob('*.html'):
with open(filename) as f:
soup = BeautifulSoup(f)
title = soup.find("span", id="btAsinTitle")
data.append({
"title": title.get_text(),
"author": title.find_next("a", href=True).get_text(),
"isbn": soup.find('b', text='ISBN-10:').next_sibling,
"weight": soup.find('b', text='Shipping Weight:').next_sibling,
"price": soup.findAll('span', {"class":'bb_price'})
})
with open("my_output.json", "w") as outf:
json.dump(data, outf)
main()
Upvotes: 1