Ted pottel
Ted pottel

Reputation: 6983

getting TypeError: float() argument must be a string or a number, not 'method'

I'm trying to follow a YOUTUBE tutorial on doing bitcoin price prediction using neural nets.

I can no longer run the code. The line 24

df[col] = preprocessing.scale( df[col].values)

is causing a

Traceback (most recent call last):
  File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 109, in <module>
    validation_X, validation_y = preprocess_df(main_df)
  File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 24, in preprocess_df
    df[col] = preprocessing.scale( df[col].values)
  File "C:\python37-32\lib\site-packages\sklearn\preprocessing\data.py", line 141, in scale
    force_all_finite='allow-nan')
  File "C:\python37-32\lib\site-packages\sklearn\utils\validation.py", line 496, in check_array
    array = np.asarray(array, dtype=dtype, order=order)
  File "C:\python37-32\lib\site-packages\numpy\core\_asarray.py", line 85, in asarray
    return array(a, dtype, copy=False, order=order)
TypeError: float() argument must be a string or a number, not 'method'

code:

import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random


# create a empty data frame
main_df= pd.DataFrame()

SEQ_LEN = 60 # last 60 minuts of data
FUTURE_PERIOD_PREDICT = 3 # i minute
RATIO_TO_PREDICT="LTC-USD"

def preprocess_df(df):
    df=df.drop("future",1)

    for col in df.columns:
        if col != "":
            df[col]= df[col].pct_change
            df.dropna(inplace=True)

            df[col] = preprocessing.scale( df[col].values)

    df.dropna(inplace=True)

    sequential_data= []
    prev_days=deque(maxLen=SEQ_LEN)

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    # dont understand this why are we shuffling seq data
    random.shuffle(sequential_data)

    buys = []
    sells = []

    for seq, target in sequential_data:
        if target==0:
            sells.append({seq, target})
        else:
            buys.append({seq, target})

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min ( len(buys), len(sells))
    buys=buys[:lower]
    sells = sells[:lower]

    sequential_data=buys+sells
    random.shuffle(sequential_data)

    X=[]
    y=[]

    for seq, target in sequential_data:
        X.append(seq)
        y.append( target)

    return np.array(X), y

def classify( current, future):
    if float(future) > float(future):
        return 1
    else:
        return 0



ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
    dataset= f"c:\\datasets\\crypto_data\\{ratio}.csv"
    df=pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"] )

    df.rename( columns={ "close":f"{ratio}_close","volume":f"{ratio}_volume" }, inplace=True )
    df.set_index("time", inplace=True)
    df=df[ [f"{ratio}_close", f"{ratio}_volume" ] ]

    if len(main_df)==0:
        main_df = df
    else:
        main_df = main_df.join(df)

# adding a new col
main_df['future'] = main_df [ f"{RATIO_TO_PREDICT}_close" ].shift(-FUTURE_PERIOD_PREDICT)
main_df["target"] = list ( map( classify, main_df[f"{RATIO_TO_PREDICT}_close"],main_df['future'] ))

#get a sorted list of just the time sgtamps
# NOTE vales converts it to a numpy array wjitch has the methed sort
times=sorted( main_df.index.values)

# negativ to get t5% from the top ie
last_5pct=times[ -int(0.5*len(times))]
print(last_5pct)

#get validation data
# we do not slice it becouse the data may be out of order
# tsting data
validation_main_df=main_df[( main_df.index >= last_5pct)]

# traing data
main_df=main_df[( main_df.index < last_5pct)]

validation_X, validation_y = preprocess_df(main_df)

Upvotes: 0

Views: 468

Answers (1)

Psychotechnopath
Psychotechnopath

Reputation: 2744

You should use add brackets behind the function call, like this:

df[col] = preprocessing.scale( df[col].values())

Also, there is another error in your code. The maxlen argument in the Deque class (This line in your code)

prev_days=deque(maxLen=SEQ_LEN)

is written without a capital L, so it should be prev_days=deque(maxlen=SEQ_LEN)

Upvotes: 1

Related Questions