Pr.Args
Pr.Args

Reputation: 11

Drop the ticker row of yfinance dataframe

I'm trying to create a dataframe and a CSV of stocks from finance but I keep getting the NA,NA,AAPL... as my first row how can I fix it?

my code:

import os

# Define the list of stock symbols and date range
stocks = ["AAPL", "AMZN", "NFLX"]
start_date = "2020-01-01"
end_date = "2023-01-01"
stock_data_path = "***/data"


# Ensure the save path exist
os.makedirs(stock_data_path, exist_ok=True)

# Loop through the stocks and download the data
for stock in stocks:
    print(f"Fetching data for {stock}...")
    data = yf.download(stock, start=start_date, end=end_date)

    # Reset the index to have Date as a column
    data.reset_index(inplace=True)

    # Add the Ticker column
    data['Ticker'] = stock

    # Remove the 0's row


    # Keep only necessary columns
    data = data[["Ticker", "Date", "Open", "Close", "High", "Low", "Volume"]]

    # Save the data to a CSV file
    file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
    data.to_csv(file_path, index=False)
    

# Combine all files into a single DataFrame and save to CSV
for stock in stocks:
    file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
    stock_data = pd.read_csv(file_path)

    # Concating so the all_data will have only the 7 columns that are shared by all the     stocks
    if stock == stocks[0]:
        all_data = stock_data
    else:
        all_data = pd.concat([all_data, stock_data], ignore_index=True)


# Save combined data to CSV
all_data_file_path = os.path.join(stock_data_path, "all_stocks_data.csv")
all_data.to_csv(all_data_file_path, index=False)

all_data

The first row of the dataframe

Thank you for your help

Upvotes: 0

Views: 586

Answers (2)

anon98273746
anon98273746

Reputation: 21

This issue can be resolved by specifying multi_level_index = False in the arguments of yfinance.download().

Upvotes: 2

Pr.Args
Pr.Args

Reputation: 11

Turns out it was an issue with the way finance is creating the data frame and I had to unfold it multiple times here is the revised code:

 import os

# Define the list of stock symbols and date range
stocks = ["AAPL", "AMZN", "NFLX"]
start_date = "2020-01-01"
end_date = "2023-01-01"
stock_data_path = "C:/Users***/FinTrendLSTM/data"

# Add a directory to save the dataframes
data_frames = {}

# Ensure the save path exist
os.makedirs(stock_data_path, exist_ok=True)

# Loop through the stocks and download the data
for stock in stocks:
    print(f"Fetching data for {stock}...")
    data = yf.download(stock, start=start_date, end=end_date)

    print(data.head())
    # Reset the index to make Date a column
    data.reset_index(inplace=True)

    print(data.head())
    # Add the Ticker column
    data['Ticker'] = stock   

    #print all the unfolded lyres of the dataframe loop ovrer data and unfold it
    data.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in data.columns]
    print(data.head())
    # rename the columns bu rmoving the everythign after the underscore including the underscore
    data.columns = [col.split("_")[0] for col in data.columns]
    print(data.head())
    # Keep only necessary columns
    data = data[["Ticker", "Date", "Open", "Close", "High", "Low", "Volume"]]
    print(data.head())
    # Add the data to the dictionary
    data_frames[stock] = data

    # Save the data to a CSV file
    file_path = os.path.join(stock_data_path, f"{stock}_stock_data.csv")
    data.to_csv(file_path, index=False)
    
# combine all the dataframes into a single dataframe
for stock, data in data_frames.items():
    if stock == stocks[0]:
        all_data = data
    else:
        all_data = pd.concat([all_data, data], ignore_index=True, axis=0)

# Save the combined data to a CSV file
all_data_file_path = os.path.join(stock_data_path, "all_stocks_data.csv")
all_data.to_csv(all_data_file_path, index=False)

all_data

Upvotes: 0

Related Questions