Reputation: 685
The data is given below where each key: datetime has a list of tuples with letters and corresponding percentage. I'm trying to produce something similar to the picture below, however I'm having some trouble with the letters since they're not consistent throughout the time where letters come and go like in the example.
{
datetime.datetime(2020, 2, 14, 0, 0):
[('A', 0.1),
('B', 0.2),
('C', 0.1),
('D', 0.25),
('E', 0.05)],
datetime.datetime(2020, 5, 15, 0, 0):
[('A', 0.1),
('B', 0.14),
('C', 0.09),
('D', 0.16),
('F', 0.6)], # Note F
...
}
Upvotes: 1
Views: 1813
Reputation: 25033
It's a matter of unfolding the dictionary according to its inner keys, the ones relevant to plotting
from matplotlib.pyplot import plot, show
from datetime import datetime as dt
data = {dt(2020,2,14,0,0):[('A',0.1),('B',0.20),('C',0.10),('D',0.25),('E',0.05)],
dt(2020,5,15,0,0):[('A',0.1),('B',0.14),('C',0.09),('D',0.16),('F',0.60)]}
by_inner_key = {}
for date, tuples in data.items():
for k, val in tuples: by_inner_key.setdefault(k, []).append((date, val))
for k in by_inner_key: plot(*zip(*by_inner_key[k])
show()
that gives (note the outlier associated with 'F'
)
And now for a version with inline comments and a few well deserved embellishments
from matplotlib.pyplot import legend, plot, show, xticks
from datetime import datetime as dt
data = {dt(2020,2,14,0,0):[('A',0.1),('B',0.20),('C',0.10),('D',0.25),('E',0.05)],
dt(2020,5,15,0,0):[('A',0.1),('B',0.14),('C',0.09),('D',0.16),('F',0.60)]}
# unfold `data` using the first tuple element as the key,
# storing dates and values in a list of 2-tuples
# {'A':[(date0,value0),(date1,value1),...], 'B':[...], ...}
by_inner_key = {}
for date, tuples in data.items():
for key, value in tuples:
by_inner_key.setdefault(key, []).append((date, value))
# plot the data ordered by key
# for the sake of clarity we unpack the transposed list
for key, list_of_tuples in by_inner_key.items():
# OP's data is already sorted but who knows…
list_of_tuples.sort()
dates, values = zip(*list_of_tuples) # transpose, from N 2-tuples to 2 N-tuples
plot(dates, values, label=key)
#cosmetics
xticks((dt(2020,2,14),dt(2020,3,15),dt(2020,4,15),dt(2020,5,15)))
legend(ncol=6)
show()
Better, isn't it?
If you prefer not to show legends for unplotted lines, you can do as follows
plot(dates, values, label=key if len(values)>1 else '')
Upvotes: 2
Reputation: 12496
Check this code:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
data = {
datetime.datetime(2020, 2, 14, 0, 0):
[('A', 0.1),
('B', 0.2),
('C', 0.1),
('D', 0.25),
('E', 0.05)],
datetime.datetime(2020, 2, 15, 0, 0):
[('A', 0.1),
('B', 0.14),
('C', 0.09),
('D', 0.16),
('F', 0.6)],
datetime.datetime(2020, 2, 16, 0, 0):
[('A', 0.1),
('C', 0.25),
('D', 0.05),
('E', 0.3),
('F', 0.15)],
datetime.datetime(2020, 2, 17, 0, 0):
[('B', 0.15),
('C', 0.25),
('D', 0.2),
('E', 0.25),
('F', 0.05)],
datetime.datetime(2020, 2, 18, 0, 0):
[('A', 0.2),
('B', 0.3),
('C', 0.1),
('D', 0.15),
('E', 0.25)],
datetime.datetime(2020, 2, 19, 0, 0):
[('A', 0.15),
('B', 0.15),
('C', 0.1),
('E', 0.3),
('F', 0.25)],
datetime.datetime(2020, 2, 20, 0, 0):
[('A', 0.15),
('B', 0.25),
('C', 0.05),
('D', 0.1),
('E', 0.35)],
}
df = pd.DataFrame(columns = ['date', 'letter', 'value'])
for key, value in data.items():
date = pd.DataFrame({'date': [key]*len([couple[0] for couple in value]),
'letter': [couple[0] for couple in value],
'value': [couple[1] for couple in value]})
df = df.append(date)
fig, ax = plt.subplots(figsize = (12, 6))
sns.lineplot(ax = ax,
data = df,
x = 'date',
y = 'value',
hue = 'letter')
plt.show()
I generated some data similar to your in order to make the plot.
Then I organized the data, date by date, in a temporary dataframe date
, in order to extract the letters and values from the list. Then I appended this temporary dataframe to the general one df
which looks something like:
date letter value
0 2020-02-14 A 0.10
1 2020-02-14 B 0.20
2 2020-02-14 C 0.10
3 2020-02-14 D 0.25
4 2020-02-14 E 0.05
0 2020-02-15 A 0.10
1 2020-02-15 B 0.14
2 2020-02-15 C 0.09
3 2020-02-15 D 0.16
4 2020-02-15 F 0.60
0 2020-02-16 A 0.10
1 2020-02-16 C 0.25
2 2020-02-16 D 0.05
3 2020-02-16 E 0.30
4 2020-02-16 F 0.15
0 2020-02-17 B 0.15
1 2020-02-17 C 0.25
2 2020-02-17 D 0.20
3 2020-02-17 E 0.25
4 2020-02-17 F 0.05
0 2020-02-18 A 0.20
1 2020-02-18 B 0.30
2 2020-02-18 C 0.10
3 2020-02-18 D 0.15
4 2020-02-18 E 0.25
0 2020-02-19 A 0.15
1 2020-02-19 B 0.15
2 2020-02-19 C 0.10
3 2020-02-19 E 0.30
4 2020-02-19 F 0.25
0 2020-02-20 A 0.15
1 2020-02-20 B 0.25
2 2020-02-20 C 0.05
3 2020-02-20 D 0.10
4 2020-02-20 E 0.35
Finally I plot the values by date, splitted according to letters with sns.lineplot
. I get this plot.
As you can see, on 2020-02-14
the letter F
does not appear in the data as well as in the plot. On 2020-02-15
the letter E
does not appear in the data, so in the plot the line jumps this date and the next value of E
is on 2020-02-16
.
Upvotes: 1