Reputation: 323
I have this script that pulls data from Kaikos API and puts it in a pandas dataframe then writes to a CSV file. Sometimes the request data returns a continuation token this is then attached to the URL and the rest of the data is available.
I need to make my code run this process in a loop until there are no more continuation codes. And I need to either merge or append each dataframe then write them as one CSV at the end. I have no clue on how to do this. The last if statement gets the token and add it the URL but now how do I repeat the process with the new URL? How do I stop when they are gone? How do I combine all this data?
I'm going through the append and merge docs but it's not really helping.
import requests
import json
import pandas as pd
import time
interval = '1h'
page_size = '1000'
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?interval={interval}&page_size={page_size}'
KEY = 'xxx'
headers = {
"X-Api-Key": KEY,
"Accept": "application/json",
"Accept-Encoding": "gzip"
}
res = requests.get(url, headers=headers)
j_data = res.json()
parse_data = j_data['data']
c_token = j_data.get('continuation_token')
today = time.strftime("%Y-%m-%d")
if c_token:
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?interval={interval}&page_size={page_size}&continuation_token={c_token}'
# create dataframe
df = pd.DataFrame.from_dict(pd.json_normalize(parse_data), orient='columns')
df.insert(1, 'time', pd.to_datetime(df.timestamp.astype(int),unit='ms'))
print(url)
df.to_csv(f'kaiko-data-{today}.csv', index=False, encoding='utf-8')
Data with continuation token:
{'query': {'page_size': 1, 'exchange': 'cbse', 'instrument_class': 'spot', 'instrument': 'btc-usd', 'interval': '1h', 'sort': 'desc', 'ch': True, 'aggregation': 'count_ohlcv_vwap', 'data_version': 'v1', 'commodity': 'trades', 'request_time': '2021-01-21T17:43:21.829Z'}, 'time': '2021-01-21T17:43:21.933Z', 'timestamp': 1611251001933, 'data': [{'timestamp': 1611248400000, 'open': '31017.12', 'high': '31949.99', 'low': '30980.01', 'close': '31910.7', 'volume': '1687.7296551800025', 'price': '31453.507769478947', 'count': 18657}], 'result': 'success', 'continuation_token': 'eHS2F4YczupYKnGrosahFiBi1SVXrFdADpDZf6jtiWDTcLQSgrLzRnJWgTZrbok1VMfZa8Z1ntSiEqbFDPfQN8jNiMNgsjHmRZYazR6yk8GoyQ4N6pyYXdhnLVzZqwLSPya9Lqvb3ZSZH9kWZ4jmyZrwtAZzugDd', 'next_url': 'http://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?continuation_token=eHS2F4YczupYKnGrosahFiBi1SVXrFdADpDZf6jtiWDTcLQSgrLzRnJWgTZrbok1VMfZa8Z1ntSiEqbFDPfQN8jNiMNgsjHmRZYazR6yk8GoyQ4N6pyYXdhnLVzZqwLSPya9Lqvb3ZSZH9kWZ4jmyZrwtAZzugDd', 'access': {'access_range': {'start_timestamp': 1610928000000, 'end_timestamp': 1612223999000}, 'data_range': {'start_timestamp': 1608249600000, 'end_timestamp': 1612223999000}}}
Upvotes: 0
Views: 744
Reputation: 9047
Here is one solution
import requests
import json
import pandas as pd
import time
import os
interval = '1h'
page_size = '1000'
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?interval={interval}&page_size={page_size}'
KEY = 'xxx'
headers = {
"X-Api-Key": KEY,
"Accept": "application/json",
"Accept-Encoding": "gzip"
}
my_csv_file = "my_csv_file.csv"
c_token = True
while(c_token):
res = requests.get(url, headers=headers)
j_data = res.json()
parse_data = j_data['data']
c_token = j_data.get('continuation_token')
today = time.strftime("%Y-%m-%d")
if c_token:
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?interval={interval}&page_size={page_size}&continuation_token={c_token}'
# create dataframe
df = pd.DataFrame.from_dict(pd.json_normalize(parse_data), orient='columns')
df.insert(1, 'time', pd.to_datetime(df.timestamp.astype(int),unit='ms'))
print(url)
if(my_csv_file in os.listdir()): #that means file already exists need to append
csv_string = df.to_csv(index=False, encoding='utf-8', header=False)
with open(my_csv_file, 'a') as f:
f.write(csv_string)
else: #that means writing file for the first time
csv_string = df.to_csv(index=False, encoding='utf-8')
with open(my_csv_file, 'w') as f:
f.write(csv_string)
Upvotes: 1