gllow
gllow

Reputation: 63

Python KeyError: 1.0

I'm trying to run this code

from math import sqrt
import numpy as np
import warnings
from collections import Counter
import pandas as pd
import random

def k_nearest_neighbors(data,predict, k =3):
 if len(data) >= k:
  warnings.warn('K is set to a value less than total voting groups')
 distances = []
 for group in data:
  for features in data[group]:
   eucliden_distance = np.linalg.norm(np.array(features)-np.array(predict))
   distances.append([eucliden_distance,group])
 votes = [i[1] for i in sorted(distances)[:k]]
 print(Counter(votes).most_common(1))
 vote_result = Counter(votes).most_common(1)[0][0]
 return vote_result

df = pd.read_csv('bc2.txt')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace = True)
full_data = df.astype(float).values.tolist()

random.shuffle(full_data)
test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))]
test_data = full_data[-int(test_size*len(full_data)):]


for i in train_data:
 train_set[i[-1]].append(i[:-1])

for i in train_data:
 test_set[i[-1]].append(i[:-1])

correct = 0
total = 0

for group in test_set:
 for data in test_set[group]:
  vote = k_nearest_neighbors(train_set,data, k=5)
  if group == vote:
   correct += 1
  total += 1

print ('Accuracy:',correct/total)

it comes out with this error msg

File "ml8.py", line 38, in <module>
    train_set[i[-1]].append(i[:-1])
KeyError: 1.0

file m18.py is this above code file

below is the sample of txt file

id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
1000025,2,5,1,1,1,2,1,3,1,1
1002945,2,5,4,4,5,7,10,3,2,1
1015425,2,3,1,1,1,2,2,3,1,1
1016277,2,6,8,8,1,3,4,3,7,1
1017023,2,4,1,1,3,2,1,3,1,1
1017122,4,8,10,10,8,7,10,9,7,1
1018099,2,1,1,1,1,2,10,3,1,1
1018561,2,2,1,2,1,2,1,3,1,1
1033078,2,2,1,1,1,2,1,1,1,5
1033078,2,4,2,1,1,2,1,2,1,1
1035283,2,1,1,1,1,1,1,3,1,1
1036172,2,2,1,1,1,2,1,2,1,1
1041801,4,5,3,3,3,2,3,4,4,1

I'm using 2.7.11 version

Upvotes: 0

Views: 5221

Answers (1)

Your train_set only contains keys 2 and 4, whereas your classes in that sample are 1 and 5.

Instead of using

train_set = {2:[],4:[]}

you might have better luck with defaultdict:

from collections import defaultdict
train_set = defaultdict(list)

This way a non-existent key will be initialized to a new empty list on first access.

Upvotes: 2

Related Questions