Reputation: 11
I've been trying to create a table in DuckDB database for stock prices. Everything works ok except for date (which is EOD from yahoo finance): it should be 2024-01-02 but in DB its: 2024-01-02 00:00:00.000. What am I missing here? I even placed SELECT CAST(Date AS DATE)
but it didn't help either.
import yfinance as yf
import duckdb
import pandas as pd
from datetime import datetime, timedelta
# Download data
data = yf.download('AAPL', start='2024-01-01', rounding=True)
data.reset_index(inplace=True)
# Set Date column as a date without the time component
data['Date'] = data['Date'].dt.date
# Rename the 'Adj Close' column to 'Adj_Close'
data.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True)
# Database connection path for DuckDB
db_path = "SPY.duckdb"
conn = duckdb.connect(db_path)
symbol = "SPY"
# Create table
create_table_query = f"""
CREATE TABLE IF NOT EXISTS {symbol} (
Date DATE UNIQUE,
Open FLOAT,
High FLOAT,
Low FLOAT,
Close FLOAT,
Adj_Close FLOAT,
Volume BIGINT
);
"""
conn.execute(create_table_query)
# Insert data into DuckDB with explicit casting of Date column
conn.execute(f"INSERT INTO {symbol} SELECT CAST(Date AS DATE), Open, High, Low, Close, Adj_Close, Volume FROM data")
How to get rid of time stamp in DB?
Upvotes: 1
Views: 264
Reputation: 36
Let's try this implementation.
Remove (backup) your previous SPY.duckdb
file
import yfinance as yf
import duckdb
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
data = yf.download("AAPL", start="2024-01-01", rounding=True)
data.reset_index(inplace=True)
data["Date"] = data["Date"].dt.date
data.rename(columns={"Adj Close": "Adj_Close"}, inplace=True)
if isinstance(data.columns, pd.MultiIndex) and data.columns.nlevels > 1:
data.columns = data.columns.droplevel(1)
db_path = "SPY.duckdb"
conn = duckdb.connect(db_path)
symbol = "SPY"
# Create table
create_table_query = f"""
CREATE TABLE IF NOT EXISTS {symbol} (
Date DATE UNIQUE,
Open FLOAT,
High FLOAT,
Low FLOAT,
Close FLOAT,
Adj_Close FLOAT,
Volume BIGINT
);
"""
conn.execute(create_table_query)
data.to_sql(symbol, conn, if_exists="replace", index=False)
row_data_from_db = conn.execute(f"SELECT * FROM {symbol} limit 1").fetchall()
print(row_data_from_db)
it gives us
[(datetime.date(2024, 1, 2), 184.94000244140625, 185.63999938964844, 188.44000244140625, 183.88999938964844, 187.14999389648438, 82488700)]
Upvotes: 1