CaptainRyan1
CaptainRyan1

Reputation: 21

How to turn a pre-made list in rows and columns just like a matrix into a csv file

from bs4  import BeautifulSoup
import requests
import pandas as pd 
import numpy as np



url = "https://www.bls.gov/web/ximpim/beaimp.htm"

page = requests.get(url)
doc = BeautifulSoup(page.text, "html.parser")

table1 = doc.find('table', id='main-content-table')

data_points = doc.find_all("span",{"class":"datavalue"})
numbers = [x.text for x in data_points]
numbers1 = numbers[651:1107]
numbers2 = numbers[2431:2900]
df1 = np.matrix(numbers1)

df2 = np.matrix(numbers2)





headers = []
for i in table1.find_all('th'):
    title = i.text
    headers.append(title)
table1 = headers[72:111]
table2 = headers[225:265]


datafile = pd.DataFrame([table1,numbers1])
datafile = datafile.transpose()
datafile.columns=['Category','0']
datafile.head()

datafile.to_csv('hello.csv', header=False)

The idea is to have it into a csv file: Columns are suppose to included the title and then follow by a 0 for each columns I thought I could convert it into a 38 by 12 matrix The output I want is:

All imports excluding petroleum 
1985    0   0    0     0    0    0      0   0    0      0   0    0

    -   -   73.9    -   -   74.3    -   -   74.8    -   -   76.8

1986

    -   -   79.1    -   -   79.9    -   -   82.5    -   -   82.9

1987

    -   -   84.8    -   -   87.0    -   -   87.7    -   -   90.2

1988

    -   -   92.3    -   -   94.6    -   -   94.0    -   -   96.2

1989

    96.9    96.1    96.7    96.4    96.7    95.9    95.2    95.1    95.2    95.5    95.7    96.0

1990

    96.1    96.4    96.9    96.7    96.4    96.4    96.3    96.8    97.6    98.0    98.3    98.9

1991

    99.0    99.3    99.6    98.9    98.8    98.3    97.8    97.8    98.0    98.5    98.7    99.1

1992

    99.8    100.0   99.8    99.1    99.0    99.5    99.9    100.3   100.7   101.2   100.9   99.9

1993

    99.9    99.7    99.9    100.2   100.6   100.5   100.7   100.8   100.9   101.4   101.3   101.3

1994

    101.6   101.5   101.8   102.1   102.3   102.6   103.1   103.8   104.1   104.8   105.1   105.2

1995

    105.5   105.9   106.4   107.0   107.7   107.6   108.0   108.0   107.8   107.5   107.7   107.7

1996

    107.4   107.4   107.1   107.0   106.7   106.2   105.9   105.7   106.1   105.8   105.7   105.8

1997

    105.4   105.3   104.9   104.3   104.2   104.3   104.1   103.8   103.7   103.4   103.3   102.8

1998

    102.2   101.7   101.4   101.1   100.9   100.5   100.0   99.6    99.4    99.5    99.6    99.4

1999

    99.5    99.4    99.0    98.8    99.0    98.8    98.6    98.7    98.9    99.0    99.4    99.4

2000

    99.4    99.7    100.0   100.1   99.9    99.9    100.2   100.3   100.0   100.0   99.9    100.7

2001

    101.6   100.8   100.0   99.5    99.2    98.9    97.8    97.5    97.3    96.8    96.6    96.2

2002

    96.1    95.7    95.8    96.3    96.2    96.2    96.2    96.3    96.4    96.4    96.3    96.5

2003

    96.8    97.1    98.1    97.1    96.9    97.3    97.3    97.0    97.3    97.2    97.4    97.7

2004

    98.5    98.9    99.1    99.4    99.6    99.7    99.7    100.0   100.1   100.0   100.9   101.3

2005

    101.6   101.7   102.0   102.4   102.2   102.0   101.8   101.9   102.8   103.8   103.7   103.7

2006

    104.0   103.3   103.0   103.1   103.8   104.2   104.2   104.7   104.8   104.2   105.2   105.7

2007

    105.6   105.6   105.9   106.2   106.8   107.1   107.2   107.2   107.1   107.7   108.5   108.9

2008

    109.7   110.4   111.6   113.1   113.9   114.9   115.6   115.1   114.0   113.0   111.1   109.9

2009

    109.0   108.2   107.3   107.1   107.3   107.4   107.2   107.6   107.9   108.4   109.1   109.7

2010

    110.3   110.4   110.3   110.8   111.2   110.7   110.5   110.7   111.0   111.3   112.2   112.6

2011

    113.6   114.4   115.0   115.9   116.4   116.4   116.5   116.8   117.0   116.6   116.3   116.4

2012

    116.4   116.3   116.7   116.7   116.6   116.3   115.9   115.8   116.0   116.4   116.4   116.5

2013

    116.6   116.6   116.5   116.5   116.1   115.7   115.0   114.8   114.8   114.9   115.0   115.2

2014

    115.7   116.0   116.5   116.1   116.0   115.8   115.8   115.7   115.6   115.4   115.1   115.1

2015

    114.3   114.0   113.5   113.0   112.9   112.8   112.5   112.1   111.9   111.5   111.2   110.8

2016

    110.7   110.5   110.4   110.4   110.8   110.5   111.0   111.1   111.2   111.1   111.1   111.1

2017

    111.2   111.5   111.6   111.9   111.9   112.0   111.9   112.1   112.5   112.5   112.7   112.6

2018

    113.3   113.7   113.7   113.8   113.9   113.5   113.4   113.1   113.2   113.4   113.5   113.7

2019

    113.0   113.2   113.2   112.7   112.4   112.0   111.9   111.8   111.8   111.7   111.8   112.0

2020

    112.1   112.3   112.1   111.5   111.6   111.9   112.1   113.0   113.7   113.6   113.6   114.1

2021

    115.1   115.7   116.7   117.6   118.8   119.6   119.7   119.7   119.9   120.7   121.5   122.0

2022

    123.8   124.8   126.2   126.8   126.7   126.0   125.2   124.9   124.5   124.3   123.9   124.9

Upvotes: 0

Views: 54

Answers (1)

Code Different
Code Different

Reputation: 93161

The BLS has a data API that would be preferable over web scraping. The Series ID is a bit hard to find. It's not what's shown on the page (typical government issue). You can use the BLS Data Finder to get that:

# You can only query 10 years at a time without registering for an API key
step = 10

data = []
for start_year in range(1985, 2023, step):
    payload = {
        # All imports excluding petroleum
        "seriesid": ["EIUIREXPET"],
        "startyear": start_year,
        "endyear": start_year + step - 1
    }
    r = requests.post("https://api.bls.gov/publicAPI/v2/timeseries/data/", json=payload)
    r.raise_for_status()

    data.extend(r.json()["Results"]["series"][0]["data"])

df = pd.DataFrame(data)[["year","period","value"]]
df["period"] = df["period"].str.strip("M").astype(int)
df = df.pivot(index="year", columns="period")

Upvotes: 0

Related Questions