Reputation: 2683
I have the following df
df = pd.DataFrame({'sku_id': {0: 4901301230676,
1: 4901301230638,
2: 8886451000583,
3: 8710428021304,
4: 37338022},
'stock': {0: 5, 1: 12, 2: 9, 3: 14, 4: 332},
'price': {0: 639.0, 1: 639.0, 2: 849.0, 3: 849.0, 4: 128.0},
'special_price': {0: '599', 1: '599', 2: '849', 3: '849', 4: 128.0}})
and a custom value
timestamp = 1580515200
how can I transform that into a nested dict like below
{ 'items' : [{'sku': '4901301230676',
'records': [{'timestamp': 1580515200,
'inv': 5,
'price': Decimal('639.0'),
'special_price': Decimal('599'),
'sales_added': False}]},
{'sku': '4901301230638',
'records': [{'timestamp': 1580515200,
'inv': 12,
'price': Decimal('639.0'),
'special_price': Decimal('599'),
'sales_added': False}]},
{'sku': '8886451000583',
'records': [{'timestamp': 1580515200,
'inv': 9,
'price': Decimal('849.0'),
'special_price': Decimal('849'),
'sales_added': False}]},
{'sku': '8710428021304',
'records': [{'timestamp': 1580515200,
'inv': 14,
'price': Decimal('849.0'),
'special_price': Decimal('849'),
'sales_added': False}]},
{'sku': '37338022',
'records': [{'timestamp': 1580515200,
'inv': 332,
'price': Decimal('128.0'),
'special_price': Decimal('128.0'),
'sales_added': False}]}]}
I have tried
from decimal import Decimal
nestedDict = {'items':[]}
for item in df.itertuples():
output = {"sku":str(item.sku_id),
'records': [
{
'timestamp':timestamp,
'inv':int(item.stock),
'price':Decimal(str(item.price)),
'special_price':Decimal(str(item.special_price)),
'sales_added': False
}
]
}
nestedDict['items'].append(output)
But this is really slow because it is making use of the itertuples and python for loops. Is there a better of executing this operation by vectorization or otherwise.
I am looking to process around 20 million rows
Upvotes: 1
Views: 69
Reputation: 19947
You can use a lambda function to build the dict for each row and then conver the df to a list in the end:
(
df.apply(lambda x: {'sku': x.sku_id,
'records': [{'timestamp': timestamp,
'inv': x.stock,
'price': str(x.price),
'special_price': str(x.special_price),
'sales_added': False}]}, axis=1)
.pipe(lambda x: {'item': x.tolist()})
)
{'item': [{'sku': 4901301230676,
'records': [{'timestamp': 1580515200,
'inv': 5,
'price': '639.0',
'special_price': '599',
'sales_added': False}]},
{'sku': 4901301230638,
'records': [{'timestamp': 1580515200,
'inv': 12,
'price': '639.0',
'special_price': '599',
'sales_added': False}]},
{'sku': 8886451000583,
'records': [{'timestamp': 1580515200,
'inv': 9,
'price': '849.0',
'special_price': '849',
'sales_added': False}]},
{'sku': 8710428021304,
'records': [{'timestamp': 1580515200,
'inv': 14,
'price': '849.0',
'special_price': '849',
'sales_added': False}]},
{'sku': 37338022,
'records': [{'timestamp': 1580515200,
'inv': 332,
'price': '128.0',
'special_price': '128.0',
'sales_added': False}]}]}
Upvotes: 1
Reputation:
Here's my try at it, let me know if it is fast enough:
In [3]: df = pd.DataFrame({'sku_id': {0: 4901301230676,
...: 1: 4901301230638,
...: 2: 8886451000583,
...: 3: 8710428021304,
...: 4: 37338022},
...: 'stock': {0: 5, 1: 12, 2: 9, 3: 14, 4: 332},
...: 'price': {0: 639.0, 1: 639.0, 2: 849.0, 3: 849.0, 4: 128.0},
...: 'special_price': {0: '599', 1: '599', 2: '849', 3: '849', 4: 128.0}})
...:
...: timestamp = 1580515200
...:
...: df["timestamp"] = timestamp
...: df["sales_added"] = False
...:
...: df.rename(columns={"stock": "inv"}, inplace=True)
...:
...: sku_id = df["sku_id"]
...:
...: df = df[["timestamp", "inv", "price", "special_price", "sales_added"]]
...:
...: {"items": pd.DataFrame({"sku": sku_id, "records": df.to_dict(orient="rec
...: ords")}).to_dict(orient="records")}
Out[3]:
{'items': [{'sku': 4901301230676,
'records': {'timestamp': 1580515200,
'inv': 5,
'price': 639.0,
'special_price': '599',
'sales_added': False}},
{'sku': 4901301230638,
'records': {'timestamp': 1580515200,
'inv': 12,
'price': 639.0,
'special_price': '599',
'sales_added': False}},
{'sku': 8886451000583,
'records': {'timestamp': 1580515200,
'inv': 9,
'price': 849.0,
'special_price': '849',
'sales_added': False}},
{'sku': 8710428021304,
'records': {'timestamp': 1580515200,
'inv': 14,
'price': 849.0,
'special_price': '849',
'sales_added': False}},
{'sku': 37338022,
'records': {'timestamp': 1580515200,
'inv': 332,
'price': 128.0,
'special_price': 128.0,
'sales_added': False}}]}
Upvotes: 2