Reputation: 347
I am trying to learn PyTorch and create my first neural network. I am using a custom dataset, here is a sample of the data:
ID_REF cg00001854 cg00270460 cg00293191 cg00585219 cg00702638 cg01434611 cg02370734 cg02644867 cg02879967 cg03036557 cg03123104 cg03670302 cg04146801 cg04570540 cg04880546 cg07044749 cg07135408 cg07303143 cg07475178 cg07553761 cg07917901 cg08016257 cg08548498 cg08715791 cg09334636 cg11153071 cg11441796 cg11642652 cg12256803 cg12352902 cg12541127 cg13313833 cg13500819 cg13975075 cg14061946 cg14086922 cg14224196 cg14530143 cg15456742 cg16230982 cg16734549 cg17166941 cg17290213 cg17292667 cg18266594 cg18335535 cg18584803 cg19273773 cg19378199 cg19523692 cg20115827 cg20558024 cg20608895 cg20899581 cg21186299 cg22115892 cg22454769 cg22549547 cg23098693 cg23193759 cg23500537 cg23606718 cg24079702 cg24888989 cg25090514 cg25344401 cg25635000 cg25726357 cg25743481 cg26019498 cg26647566 cg26792755 cg26928195 cg26940620 Age
0 0.252486 0.284724 0.243242 0.200685 0.904132 0.102795 0.473919 0.264084 0.367480 0.671434 0.075955 0.329343 0.217375 0.210861 1.000000 0.356048 0.577945 0.557148 0.249014 0.847134 0.254539 0.319858 0.220589 0.796789 0.361994 0.296101 0.105965 0.239796 0.169738 0.357586 0.365674 0.132575 0.250932 0.283227 1.000000 0.262259 0.208146 0.290623 0.113049 0.255710 0.555382 0.281046 0.168826 0.492007 0.442871 0.509569 0.219183 0.641244 0.339088 0.164062 0.227678 0.340220 0.541491 0.423010 0.621303 0.243750 0.869947 0.124120 0.317660 0.985243 0.645869 0.590888 0.841485 0.825372 0.904037 0.407343 0.223722 0.352113 0.855653 0.289593 0.428849 0.719758 0.800240 0.473586 68
1 0.867671 0.606590 0.803673 0.845942 0.086222 0.996915 0.871998 0.791823 0.877639 0.095326 0.857108 0.959701 0.688322 0.650640 0.062329 0.920434 0.687537 0.193038 0.891809 0.273775 0.583457 0.793486 0.798427 0.102910 0.773496 0.658568 0.759050 0.754877 0.787817 0.585895 0.792240 0.734543 0.854528 0.735642 0.389495 0.736709 0.600386 0.775989 0.819579 0.696350 0.110374 0.878199 0.659849 0.716714 0.771206 0.870711 0.919629 0.359592 0.677752 0.693433 0.683448 0.792423 0.933971 0.170669 0.249908 0.879879 0.111498 0.623053 0.626821 0.000000 0.157429 0.197567 0.160809 0.183031 0.202754 0.597896 0.826429 0.886736 0.086038 0.844088 0.761793 0.056548 0.270670 0.940083 21
2 0.789439 0.594060 0.857086 0.633195 0.000000 0.953293 0.832107 0.692119 0.641294 0.169303 0.935807 0.674698 0.789146 0.796555 0.208590 0.791318 0.777537 0.221895 0.804405 0.138006 0.738616 0.758083 0.749127 0.180998 0.769312 0.592938 0.578885 0.896125 0.553588 0.781393 0.898768 0.705339 0.861029 0.966552 0.274496 0.575738 0.490313 0.951172 0.833724 0.901890 0.115235 0.651489 0.619196 0.760758 0.902768 0.835082 0.610065 0.294962 0.907979 0.703284 0.775867 0.910324 0.858090 0.190595 0.041909 0.792941 0.146005 0.615639 0.761822 0.254161 0.101765 0.343289 0.356166 0.088915 0.114347 0.628616 0.697758 0.910687 0.133282 0.775737 0.809420 0.129848 0.126485 0.875580 20
3 0.615803 0.710968 0.874037 0.771136 0.199428 0.861378 0.861346 0.695713 0.638599 0.158479 0.903668 0.758718 0.581146 0.857357 0.307756 0.977337 0.805049 0.188333 0.788991 0.312119 0.706578 0.782006 0.793232 0.288111 0.691131 0.758102 0.829221 1.000000 0.742666 0.897607 0.797869 0.803221 0.912101 0.736800 0.315636 0.760577 0.609101 0.733923 0.578598 0.796944 0.096960 0.924135 0.612601 0.727117 0.905177 0.776481 0.727865 0.429820 0.666803 0.924595 0.567474 0.752196 0.742709 0.303662 0.168286 0.720899 0.099313 0.595328 0.734024 0.268583 0.293437 0.244840 0.311726 0.213415 0.418673 0.819981 0.816660 0.684730 0.146797 0.686270 0.777680 0.087826 0.335125 1.000000 23
4 0.847329 0.735766 0.858018 0.896453 0.186994 0.831964 0.762522 0.840186 0.830930 0.199264 0.788487 0.912629 0.702284 0.838771 0.065271 0.959230 0.912387 0.377203 0.794480 0.207909 0.766246 0.582117 0.902944 0.301144 0.765401 0.715115 0.646735 0.812084 0.697886 0.714310 0.890658 0.826644 0.944022 0.729517 0.530379 0.756268 0.764899 0.914573 0.825766 0.673394 0.017316 0.949335 0.614375 0.650553 0.898788 0.685396 0.823348 0.210175 0.831852 0.829067 0.858212 0.916433 0.778864 0.241186 0.144072 0.889536 0.058360 0.703567 0.852496 0.094223 0.341236 0.284903 0.231957 0.125196 0.333207 0.752592 0.899356 0.839006 0.174601 0.937948 0.716135 0.000000 0.114062 0.969760 22
I split the data into train/test/val data like this:
train_df, rest_df = train_test_split(df, test_size=0.4)
test_df, val_df = train_test_split(rest_df, test_size=0.5)
x_train_tensor = torch.tensor(train_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_train_tensor = torch.tensor(train_df['Age'].to_numpy())
x_test_tensor = torch.tensor(test_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_test_tensor = torch.tensor(test_df['Age'].to_numpy())
x_val_tensor = torch.tensor(val_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_val_tensor = torch.tensor(val_df['Age'].to_numpy())
bs = len(train_df.index)//10
train_dl = DataLoader(train_df, bs, shuffle=True)
test_dl = DataLoader(test_df, len(test_df), shuffle=False)
val_dl = DataLoader(val_df, bs, shuffle=False)
And here is the Network so far (very basic, just to test if it works):
class Net(nn.Module):
def __init__(self):
super().__init__()
input_size = len(df.columns)-1
self.fc1 = nn.Linear(input_size, input_size//2)
self.fc2 = nn.Linear(input_size//2, input_size//4)
self.fc3 = nn.Linear(input_size//4, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
return x
net = Net()
print(net)
Here is where I get the error, on the last line:
loss = torch.nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
EPOCHS = 3
STEPS_PER_EPOCH = len(train_dl.dataset)//bs
iterator = iter(train_dl)
print(train_dl.dataset)
for epoch in range(EPOCHS):
for s in range(STEPS_PER_EPOCH):
print(iterator)
iterator.next()
ID_REF cg00001854 cg00270460 cg00293191 ... cg26928195 cg26940620 Age
29 0.781979 0.744825 0.744579 ... 0.242138 0.854054 19
44 0.185400 0.299145 0.160084 ... 0.638449 0.413286 69
21 0.085470 0.217421 0.277675 ... 0.863455 0.512334 75
4 0.847329 0.735766 0.858018 ... 0.114062 0.969760 22
20 0.457293 0.462984 0.323835 ... 0.584259 0.481060 68
33 0.784562 0.845031 0.958335 ... 0.122210 0.854005 19
25 0.258434 0.354822 0.405620 ... 0.677245 0.540463 70
27 0.737131 0.768188 0.897724 ... 0.203228 0.831175 20
37 0.002051 0.202403 0.134198 ... 0.753844 0.302229 70
10 0.737427 0.537413 0.614343 ... 0.464244 0.723953 23
0 0.252486 0.284724 0.243242 ... 0.800240 0.473586 68
32 0.927260 1.000000 0.853864 ... 0.261990 0.892503 18
7 0.035825 0.271602 0.236109 ... 1.000000 0.471256 69
17 0.000000 0.202986 0.132144 ... 0.874550 0.342981 79
18 0.282112 0.479775 0.218852 ... 0.908217 0.426143 79
11 0.708797 0.536074 0.721171 ... 0.048768 0.699540 27
15 0.686921 0.639198 0.858981 ... 0.305142 0.978350 24
38 0.246031 0.186011 0.235928 ... 0.754013 0.342380 70
30 0.814767 0.771483 0.437789 ... 0.000000 0.658354 18
43 0.247471 0.399231 0.271619 ... 0.895016 0.468336 72
46 0.000428 0.263164 0.163303 ... 0.567005 0.252806 76
3 0.615803 0.710968 0.874037 ... 0.335125 1.000000 23
5 0.777925 0.821814 0.636676 ... 0.233359 0.753266 20
34 0.316262 0.307535 0.203090 ... 0.570755 0.351226 73
23 0.133038 0.000000 0.208442 ... 0.631202 0.459593 76
6 0.746102 0.585211 0.626580 ... 0.311914 0.753994 25
1 0.867671 0.606590 0.803673 ... 0.270670 0.940083 21
47 0.444606 0.502357 0.207560 ... 0.987106 0.446959 71
[28 rows x 75 columns]
<torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7f166241c048>
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 13
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
6 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 13
I really have no idea what the error means or where to look. I'd greatly appreciate some guidance, thank you!
Upvotes: 2
Views: 4243
Reputation: 7693
Use Numpy
array instead of dataframe
. You can use to_numpy()
to convert dataframe to numpy array.
train_dl = DataLoader(train_df.to_numpy(), bs, shuffle=True)
test_dl = DataLoader(test_df.to_numpy(), len(test_df), shuffle=False)
val_dl = DataLoader(val_df.to_numpy(), bs, shuffle=False)
Upvotes: 3