Reputation: 229
I have list like this:
data = [
(datetime.datetime(2015,1,1), 666),
(datetime.datetime(2015,1,3), 777),
(datetime.datetime(2015,2,1), 888),
]
Do you guys have any idea how can I fill other date by 0? To get:
data = [
(datetime.datetime(2015,1,1), 666),
(datetime.datetime(2015,1,2), 0),
(datetime.datetime(2015,1,3), 777),
(datetime.datetime(2015,1,4), 0),
(datetime.datetime(2015,1,5), 0),
(datetime.datetime(2015,1,6), 0),
... etc,
(datetime.datetime(2015,2,1), 888),
]
Upvotes: 2
Views: 67
Reputation: 414675
To return zero for missing dates, you could use collections.defaultdict
:
>>> from datetime import datetime
>>> data = [
... (datetime(2015,1,1), 666),
... (datetime(2015,1,3), 777),
... (datetime(2015,2,1), 888),
... ]
>>> from collections import defaultdict
>>> x = defaultdict(int, data)
>>> x[datetime(2015,1,1)]
666
>>> x[datetime(2015,1,2)]
0
defaultdict
allows you to get values without creating a list that covers all values from the earliest date in the original list to the latest date. But if you need it; it is easy to make one:
>>> from datetime import timedelta
>>> def date_interval(lo, hi, step):
... while lo <= hi:
... yield lo
... lo += step
...
>>> [(d, x[d]) for d in date_interval(min(x), max(x), timedelta(1))]
[(datetime.datetime(2015, 1, 1, 0, 0), 666),
(datetime.datetime(2015, 1, 2, 0, 0), 0),
(datetime.datetime(2015, 1, 3, 0, 0), 777),
(datetime.datetime(2015, 1, 4, 0, 0), 0),
(datetime.datetime(2015, 1, 5, 0, 0), 0),
...
(datetime.datetime(2015, 1, 30, 0, 0), 0),
(datetime.datetime(2015, 1, 31, 0, 0), 0),
(datetime.datetime(2015, 2, 1, 0, 0), 888)]
Upvotes: 0
Reputation: 180481
Once the start date is the first and the end is the last just create a set of all dates in the list, get the difference in days between the start and end then loop over that range of days, if the date is not in the set yield a tuple of the start date with n days added using timedelta and 0. If it does exist just yield the next item from the data list:
from datetime import datetime, timedelta
from operator import itemgetter
data = [
(datetime(2015, 1, 1), 666),
(datetime(2015, 1, 3), 777),
(datetime(2015, 2, 1), 888),
]
def add_missing(l):
st = set(map(itemgetter(0), data))
it = iter(l)
start, end = data[0][0], data[-1][0]
for day in range((end - start).days + 1):
dte = start + timedelta(days=day)
if dte not in st:
yield (dte, 0)
else:
yield next(it)
data[:] = add_missing(data)
data will then contain your dates in order:
[(datetime.datetime(2015, 1, 1, 0, 0), 666),
(datetime.datetime(2015, 1, 2, 0, 0), 0),
(datetime.datetime(2015, 1, 3, 0, 0), 777),
(datetime.datetime(2015, 1, 4, 0, 0), 0),
(datetime.datetime(2015, 1, 5, 0, 0), 0),
(datetime.datetime(2015, 1, 6, 0, 0), 0),
(datetime.datetime(2015, 1, 7, 0, 0), 0),
(datetime.datetime(2015, 1, 8, 0, 0), 0),
(datetime.datetime(2015, 1, 9, 0, 0), 0),
(datetime.datetime(2015, 1, 10, 0, 0), 0),
(datetime.datetime(2015, 1, 11, 0, 0), 0),
(datetime.datetime(2015, 1, 12, 0, 0), 0),
(datetime.datetime(2015, 1, 13, 0, 0), 0),
(datetime.datetime(2015, 1, 14, 0, 0), 0),
(datetime.datetime(2015, 1, 15, 0, 0), 0),
(datetime.datetime(2015, 1, 16, 0, 0), 0),
(datetime.datetime(2015, 1, 17, 0, 0), 0),
(datetime.datetime(2015, 1, 18, 0, 0), 0),
(datetime.datetime(2015, 1, 19, 0, 0), 0),
(datetime.datetime(2015, 1, 20, 0, 0), 0),
(datetime.datetime(2015, 1, 21, 0, 0), 0),
(datetime.datetime(2015, 1, 22, 0, 0), 0),
(datetime.datetime(2015, 1, 23, 0, 0), 0),
(datetime.datetime(2015, 1, 24, 0, 0), 0),
(datetime.datetime(2015, 1, 25, 0, 0), 0),
(datetime.datetime(2015, 1, 26, 0, 0), 0),
(datetime.datetime(2015, 1, 27, 0, 0), 0),
(datetime.datetime(2015, 1, 28, 0, 0), 0),
(datetime.datetime(2015, 1, 29, 0, 0), 0),
(datetime.datetime(2015, 1, 30, 0, 0), 0),
(datetime.datetime(2015, 1, 31, 0, 0), 0),
(datetime.datetime(2015, 2, 1, 0, 0), 888)]
Based on your logic and expected output the data is ordered but if it happened to be random you could use min and max to get the start and end:
def add_missing(l):
st = set(map(itemgetter(0), data))
it = iter(l)
start, end = min(st), max(st)
for day in range((end - start).days + 1):
dte = start + timedelta(days=day)
if dte not in st:
yield (dte, 0)
else:
yield next(it)
Upvotes: 2