Thomas Jerkson
Thomas Jerkson

Reputation: 229

Python - fill the no-used dates

I have list like this:

data = [
    (datetime.datetime(2015,1,1), 666),
    (datetime.datetime(2015,1,3), 777),
    (datetime.datetime(2015,2,1), 888),   
]

Do you guys have any idea how can I fill other date by 0? To get:

data = [
    (datetime.datetime(2015,1,1), 666),
    (datetime.datetime(2015,1,2), 0),
    (datetime.datetime(2015,1,3), 777),
    (datetime.datetime(2015,1,4), 0),
    (datetime.datetime(2015,1,5), 0),
    (datetime.datetime(2015,1,6), 0),
    ... etc,
    (datetime.datetime(2015,2,1), 888),   
]

Upvotes: 2

Views: 67

Answers (2)

jfs
jfs

Reputation: 414675

To return zero for missing dates, you could use collections.defaultdict:

>>> from datetime import datetime
>>> data = [
...     (datetime(2015,1,1), 666),
...     (datetime(2015,1,3), 777),
...     (datetime(2015,2,1), 888),
... ]
>>> from collections import defaultdict
>>> x = defaultdict(int, data)
>>> x[datetime(2015,1,1)]
666
>>> x[datetime(2015,1,2)]
0

defaultdict allows you to get values without creating a list that covers all values from the earliest date in the original list to the latest date. But if you need it; it is easy to make one:

>>> from datetime import timedelta
>>> def date_interval(lo, hi, step):
...     while lo <= hi:
...         yield lo
...         lo += step
... 
>>> [(d, x[d]) for d in date_interval(min(x), max(x), timedelta(1))]
[(datetime.datetime(2015, 1, 1, 0, 0), 666),
 (datetime.datetime(2015, 1, 2, 0, 0), 0),
 (datetime.datetime(2015, 1, 3, 0, 0), 777),
 (datetime.datetime(2015, 1, 4, 0, 0), 0),
 (datetime.datetime(2015, 1, 5, 0, 0), 0),
 ...
 (datetime.datetime(2015, 1, 30, 0, 0), 0),
 (datetime.datetime(2015, 1, 31, 0, 0), 0),
 (datetime.datetime(2015, 2, 1, 0, 0), 888)]

Upvotes: 0

Padraic Cunningham
Padraic Cunningham

Reputation: 180481

Once the start date is the first and the end is the last just create a set of all dates in the list, get the difference in days between the start and end then loop over that range of days, if the date is not in the set yield a tuple of the start date with n days added using timedelta and 0. If it does exist just yield the next item from the data list:

from datetime import datetime, timedelta
from operator import itemgetter

data = [
    (datetime(2015, 1, 1), 666),
    (datetime(2015, 1, 3), 777),
    (datetime(2015, 2, 1), 888),
]


def add_missing(l):
    st = set(map(itemgetter(0), data))
    it = iter(l)
    start, end = data[0][0], data[-1][0]
    for day in range((end - start).days + 1):
        dte = start + timedelta(days=day)
        if dte not in st:
            yield (dte, 0)
        else:
            yield next(it)

data[:] = add_missing(data)

data will then contain your dates in order:

[(datetime.datetime(2015, 1, 1, 0, 0), 666),
 (datetime.datetime(2015, 1, 2, 0, 0), 0),
 (datetime.datetime(2015, 1, 3, 0, 0), 777),
 (datetime.datetime(2015, 1, 4, 0, 0), 0),
 (datetime.datetime(2015, 1, 5, 0, 0), 0),
 (datetime.datetime(2015, 1, 6, 0, 0), 0),
 (datetime.datetime(2015, 1, 7, 0, 0), 0),
 (datetime.datetime(2015, 1, 8, 0, 0), 0),
 (datetime.datetime(2015, 1, 9, 0, 0), 0),
 (datetime.datetime(2015, 1, 10, 0, 0), 0),
 (datetime.datetime(2015, 1, 11, 0, 0), 0),
 (datetime.datetime(2015, 1, 12, 0, 0), 0),
 (datetime.datetime(2015, 1, 13, 0, 0), 0),
 (datetime.datetime(2015, 1, 14, 0, 0), 0),
 (datetime.datetime(2015, 1, 15, 0, 0), 0),
 (datetime.datetime(2015, 1, 16, 0, 0), 0),
 (datetime.datetime(2015, 1, 17, 0, 0), 0),
 (datetime.datetime(2015, 1, 18, 0, 0), 0),
 (datetime.datetime(2015, 1, 19, 0, 0), 0),
 (datetime.datetime(2015, 1, 20, 0, 0), 0),
 (datetime.datetime(2015, 1, 21, 0, 0), 0),
 (datetime.datetime(2015, 1, 22, 0, 0), 0),
 (datetime.datetime(2015, 1, 23, 0, 0), 0),
 (datetime.datetime(2015, 1, 24, 0, 0), 0),
 (datetime.datetime(2015, 1, 25, 0, 0), 0),
 (datetime.datetime(2015, 1, 26, 0, 0), 0),
 (datetime.datetime(2015, 1, 27, 0, 0), 0),
 (datetime.datetime(2015, 1, 28, 0, 0), 0),
 (datetime.datetime(2015, 1, 29, 0, 0), 0),
 (datetime.datetime(2015, 1, 30, 0, 0), 0),
 (datetime.datetime(2015, 1, 31, 0, 0), 0),
 (datetime.datetime(2015, 2, 1, 0, 0), 888)]

Based on your logic and expected output the data is ordered but if it happened to be random you could use min and max to get the start and end:

def add_missing(l):
    st = set(map(itemgetter(0), data))
    it = iter(l)
    start, end = min(st), max(st)
    for day in range((end - start).days + 1):
        dte = start + timedelta(days=day)
        if dte not in st:
            yield (dte, 0)
        else:
            yield next(it)

Upvotes: 2

Related Questions