Reputation: 141
I have the dataframes ready to plot, but when I use matplotlib to plot these data the lines are not correct and do not show the trend.
for example, the first graph should be a curly line, however, I got a straight line plotted in the graph.
I wonder how to plot these lines correctly? and fix both axis?
import pandas as pd
import datetime as dt
import pandas_datareader as web
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.ticker as ticker
from bs4 import BeautifulSoup
import requests
import matplotlib.dates as mdates
url = 'https://www.federalreserve.gov/data.xml'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for chart in soup.select('chart'):
series = {}
index = []
for s in chart.select('series'):
series[s['description']] = []
temp_index = []
for o in s.select('observation'):
temp_index.append(o['index'])
series[s['description']].append(o['value'])
if len(temp_index) > len(index):
index = temp_index
series['index'] = index
max_len = len(max(series.values(), key=len))
for k in series:
series[k] = series[k] + ['No Data'] * (max_len - len(series[k]))
df = pd.DataFrame(series).set_index('index')
print(df)
print('-' * 80)
plt.figure()
for i in df:
plt.plot(df.index,df[i],label=chart['title'])
plt.show()
Upvotes: 1
Views: 266
Reputation: 62373
df.index = pd.to_datetime(df.index)
has been addeddf = pd.DataFrame(series, dtype=float).set_index('index')
will catch most of the columns, but there are some columns that still have stings, so can't be convertedprint(df.info())
has been added. Review and fix any column that is an object
. That means the column contains some strings and can't be converted to a float.
[np.nan]
instead of ['No Data']
, so the column can be set as a float, which will allow it to plot correctly.import numpy as np
url = 'https://www.federalreserve.gov/data.xml'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for chart in soup.select('chart'):
series = {}
index = []
for s in chart.select('series'):
series[s['description']] = []
temp_index = []
for o in s.select('observation'):
temp_index.append(o['index'])
series[s['description']].append(o['value'])
if len(temp_index) > len(index):
index = temp_index
series['index'] = index
max_len = len(max(series.values(), key=len))
for k in series:
# adding No Data is preventing the the column from being interpreted as a float
series[k] = series[k] + [np.nan] * (max_len - len(series[k]))
df = pd.DataFrame(series, dtype=float).set_index('index') # added dtype=float
df.index = pd.to_datetime(df.index) # convert the index to a datetime format
print(df)
print(df.info()) # review the printed info, any column that isn't a float has strings in it the must be fixed
print('-' * 80)
plt.figure()
for i in df:
plt.figure(figsize=(9, 5))
plt.plot(df.index, df[i])
plt.title(f'{chart["title"]}\n{i}')
plt.show()
Upvotes: 1