Reputation: 6983
I'm trying to follow a YOUTUBE tutorial on doing bitcoin price prediction using neural nets.
I can no longer run the code. The line 24
df[col] = preprocessing.scale( df[col].values)
is causing a
Traceback (most recent call last):
File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 109, in <module>
validation_X, validation_y = preprocess_df(main_df)
File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 24, in preprocess_df
df[col] = preprocessing.scale( df[col].values)
File "C:\python37-32\lib\site-packages\sklearn\preprocessing\data.py", line 141, in scale
force_all_finite='allow-nan')
File "C:\python37-32\lib\site-packages\sklearn\utils\validation.py", line 496, in check_array
array = np.asarray(array, dtype=dtype, order=order)
File "C:\python37-32\lib\site-packages\numpy\core\_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
TypeError: float() argument must be a string or a number, not 'method'
code:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
# create a empty data frame
main_df= pd.DataFrame()
SEQ_LEN = 60 # last 60 minuts of data
FUTURE_PERIOD_PREDICT = 3 # i minute
RATIO_TO_PREDICT="LTC-USD"
def preprocess_df(df):
df=df.drop("future",1)
for col in df.columns:
if col != "":
df[col]= df[col].pct_change
df.dropna(inplace=True)
df[col] = preprocessing.scale( df[col].values)
df.dropna(inplace=True)
sequential_data= []
prev_days=deque(maxLen=SEQ_LEN)
for i in df.values:
prev_days.append([n for n in i[:-1]])
if len(prev_days) == SEQ_LEN:
sequential_data.append([np.array(prev_days), i[-1]])
# dont understand this why are we shuffling seq data
random.shuffle(sequential_data)
buys = []
sells = []
for seq, target in sequential_data:
if target==0:
sells.append({seq, target})
else:
buys.append({seq, target})
random.shuffle(buys)
random.shuffle(sells)
lower = min ( len(buys), len(sells))
buys=buys[:lower]
sells = sells[:lower]
sequential_data=buys+sells
random.shuffle(sequential_data)
X=[]
y=[]
for seq, target in sequential_data:
X.append(seq)
y.append( target)
return np.array(X), y
def classify( current, future):
if float(future) > float(future):
return 1
else:
return 0
ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
dataset= f"c:\\datasets\\crypto_data\\{ratio}.csv"
df=pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"] )
df.rename( columns={ "close":f"{ratio}_close","volume":f"{ratio}_volume" }, inplace=True )
df.set_index("time", inplace=True)
df=df[ [f"{ratio}_close", f"{ratio}_volume" ] ]
if len(main_df)==0:
main_df = df
else:
main_df = main_df.join(df)
# adding a new col
main_df['future'] = main_df [ f"{RATIO_TO_PREDICT}_close" ].shift(-FUTURE_PERIOD_PREDICT)
main_df["target"] = list ( map( classify, main_df[f"{RATIO_TO_PREDICT}_close"],main_df['future'] ))
#get a sorted list of just the time sgtamps
# NOTE vales converts it to a numpy array wjitch has the methed sort
times=sorted( main_df.index.values)
# negativ to get t5% from the top ie
last_5pct=times[ -int(0.5*len(times))]
print(last_5pct)
#get validation data
# we do not slice it becouse the data may be out of order
# tsting data
validation_main_df=main_df[( main_df.index >= last_5pct)]
# traing data
main_df=main_df[( main_df.index < last_5pct)]
validation_X, validation_y = preprocess_df(main_df)
Upvotes: 0
Views: 468
Reputation: 2744
You should use add brackets behind the function call, like this:
df[col] = preprocessing.scale( df[col].values())
Also, there is another error in your code. The maxlen argument in the Deque class (This line in your code)
prev_days=deque(maxLen=SEQ_LEN)
is written without a capital L, so it should be prev_days=deque(maxlen=SEQ_LEN)
Upvotes: 1