Reputation: 35
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
df = pd.read_csv('pokemon_data.csv')
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df = df.loc[df['Total'] > 450]
df = df.loc[~df['Name'].str.contains('Mega')]
df = df.loc[~df['Name'].str.contains('Primal')]
df = df.drop(columns = ['Name'])
df = df.drop(columns = ['Generation'])
df = df.drop(columns = ['Legendary'])
df = df.drop(columns = ['Type 2'])
df = df.drop(columns = ['#'])
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 0
df.loc[df['Type 1'] == 'Normal', 'Type 1'] = 1
df.loc[df['Type 1'] == 'Water', 'Type 1'] = 2
df.loc[df['Type 1'] == 'Grass', 'Type 1'] = 3
df.loc[df['Type 1'] == 'Electric', 'Type 1'] = 4
df.loc[df['Type 1'] == 'Ice', 'Type 1'] = 5
df.loc[df['Type 1'] == 'Fighting', 'Type 1'] = 6
df.loc[df['Type 1'] == 'Poison', 'Type 1'] = 7
df.loc[df['Type 1'] == 'Ground', 'Type 1'] = 8
df.loc[df['Type 1'] == 'Flying', 'Type 1'] = 9
df.loc[df['Type 1'] == 'Psychic', 'Type 1'] = 10
df.loc[df['Type 1'] == 'Rock', 'Type 1'] = 11
df.loc[df['Type 1'] == 'Bug', 'Type 1'] = 12
df.loc[df['Type 1'] == 'Ghost', 'Type 1'] = 13
df.loc[df['Type 1'] == 'Dark', 'Type 1'] = 14
df.loc[df['Type 1'] == 'Dragon', 'Type 1'] = 15
df.loc[df['Type 1'] == 'Steel', 'Type 1'] = 16
df.loc[df['Type 1'] == 'Fairy', 'Type 1'] = 17
TEMP = ['Type 1']
for col in TEMP:
df[col] = pd.to_numeric(df[col])
df_eval_sub = df.loc[df['Total'] < 500]
df_eval_over = df.loc[df['Total'] > 500]
y_train = df.pop('Type 1')
y_eval_sub = df_eval_sub.pop('Type 1')
y_eval_over = df_eval_over.pop('Type 1')
feature_columns = []
TO_INT = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']
for col in TO_INT:
df[col] = pd.to_numeric(df[col])
NUMERIC_COLUMNS = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(df, y_train)
eval_input_fn = make_input_fn(df_eval_sub, y_eval_sub, num_epochs = 1, shuffle = False)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)
Error:
InvalidArgumentError: assertion failed: [Labels must be <= n_classes - 1] [Condition x <= y did not hold element-wise:] [x (head/losses/Cast:0) = ] [[16][0][11]...] [y (head/losses/check_label_range/Const:0) = ] [1]
[[{{node Assert}}]]
I assume that by n_classes it means the number of columns, so after cleaning the dataframe, I have 8 columns left, then I remove 1 which is the one that I use for y_train. The error gets triggered at
linear_est.train(train_input_fn)
The column that I use in y_train has 18 different values, but not sure if that is relevant.
Upvotes: 0
Views: 1577
Reputation: 4313
The n_classes
it's not means the number of columns, it's the parameter in tf.estimator.LinearClassifier that you have to specify, and the classes in your labels must <= n_classes
, in your case you should set linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns, n_classes=18)
Upvotes: 1