Reputation: 11
I'm trying to create a dataframe and a CSV of stocks from finance but I keep getting the NA,NA,AAPL... as my first row how can I fix it?
my code:
import os
# Define the list of stock symbols and date range
stocks = ["AAPL", "AMZN", "NFLX"]
start_date = "2020-01-01"
end_date = "2023-01-01"
stock_data_path = "***/data"
# Ensure the save path exist
os.makedirs(stock_data_path, exist_ok=True)
# Loop through the stocks and download the data
for stock in stocks:
print(f"Fetching data for {stock}...")
data = yf.download(stock, start=start_date, end=end_date)
# Reset the index to have Date as a column
data.reset_index(inplace=True)
# Add the Ticker column
data['Ticker'] = stock
# Remove the 0's row
# Keep only necessary columns
data = data[["Ticker", "Date", "Open", "Close", "High", "Low", "Volume"]]
# Save the data to a CSV file
file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
data.to_csv(file_path, index=False)
# Combine all files into a single DataFrame and save to CSV
for stock in stocks:
file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
stock_data = pd.read_csv(file_path)
# Concating so the all_data will have only the 7 columns that are shared by all the stocks
if stock == stocks[0]:
all_data = stock_data
else:
all_data = pd.concat([all_data, stock_data], ignore_index=True)
# Save combined data to CSV
all_data_file_path = os.path.join(stock_data_path, "all_stocks_data.csv")
all_data.to_csv(all_data_file_path, index=False)
all_data
The first row of the dataframe
Thank you for your help
Upvotes: 0
Views: 586
Reputation: 21
This issue can be resolved by specifying multi_level_index = False
in the arguments of yfinance.download()
.
Upvotes: 2
Reputation: 11
Turns out it was an issue with the way finance is creating the data frame and I had to unfold it multiple times here is the revised code:
import os
# Define the list of stock symbols and date range
stocks = ["AAPL", "AMZN", "NFLX"]
start_date = "2020-01-01"
end_date = "2023-01-01"
stock_data_path = "C:/Users***/FinTrendLSTM/data"
# Add a directory to save the dataframes
data_frames = {}
# Ensure the save path exist
os.makedirs(stock_data_path, exist_ok=True)
# Loop through the stocks and download the data
for stock in stocks:
print(f"Fetching data for {stock}...")
data = yf.download(stock, start=start_date, end=end_date)
print(data.head())
# Reset the index to make Date a column
data.reset_index(inplace=True)
print(data.head())
# Add the Ticker column
data['Ticker'] = stock
#print all the unfolded lyres of the dataframe loop ovrer data and unfold it
data.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in data.columns]
print(data.head())
# rename the columns bu rmoving the everythign after the underscore including the underscore
data.columns = [col.split("_")[0] for col in data.columns]
print(data.head())
# Keep only necessary columns
data = data[["Ticker", "Date", "Open", "Close", "High", "Low", "Volume"]]
print(data.head())
# Add the data to the dictionary
data_frames[stock] = data
# Save the data to a CSV file
file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
data.to_csv(file_path, index=False)
# combine all the dataframes into a single dataframe
for stock, data in data_frames.items():
if stock == stocks[0]:
all_data = data
else:
all_data = pd.concat([all_data, data], ignore_index=True, axis=0)
# Save the combined data to a CSV file
all_data_file_path = os.path.join(stock_data_path, "all_stocks_data.csv")
all_data.to_csv(all_data_file_path, index=False)
all_data
Upvotes: 0