Reputation: 471
So I am running some code and getting the following error in Pytorch: "RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same"
From what I understand, this means that my model may not be pushed to the GPU, while the input data already is using the GPU. I can share my code if that would help (I am refraining from doing it right now since it is longer than a small code snippet).
I am aware that I can do something like
myModel=Model()
myModel.cuda()
However, I am making a class as part of a custom module that will be added to a Sequential wrapper. So, I can't really make an object out of it (I'm not good with OOP terminology, so I apologize for any technical writing mistakes). I was wondering if there is a way to get around this issue, and make the class always use the GPU, even though I never explicitly define an object?
If this was not clear enough, I can post my code, but as previously warned it may take some time to go through (not too long, but not very convenient either).
Any help is much appreciated.
Edit: Here is the code, I presume the issue is in the RLSTM class, since there was not an error before I added this.
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print(x.size())
x = self.features(x) # runs through all features, where each feature is a function
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
# print(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
layers+=[RLSTM()]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self):
super(RLSTM,self).__init__()
def forward(self, image):
print("going in rowlstm")
global current
global _layer
global isgates
size = image.size()
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image)
except Exception as error:
concats=[]
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
tensor=torch.cat(seq,3)
return tensor
def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
inputStateGates={}
size=tensor.size() # 1 x 4h x n x n
out_ft=size[1] # get 4h for the nxnx4h tensor
num=size[2] # get n for the nxn image
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = torch.squeeze(tensor) # 4h x n x n
# First, split by row: Creates n tensors of 4h x n x 1
rows = list(tensor.split(1,2))
for i in range(num):
# Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
row=rows[i]
inputStateGates[i]=list(row.split(hh,0))
return inputStateGates
def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1
size=tensor.size()
out_ft=size[1] # get 4h for the 1x4hxn tensor
num=size[2] # get n for the 1xhxn row
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = tensor.squeeze(0) # 4h x n x 1
splitted=list(tensor.split(hh,0))
return splitted
def addGates(i2s,s2s,key):
""" these dictionaries are of form {key : [[i], [o], [f], [g]]}
we want to add pairwise elemeents """
# i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
# s2s is of form [[h,n],[h,n],[h,n], [h,n]]
gateSum = []
for i in range(4): # always of length 4, representing the gates
gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))
return gateSum
Upvotes: 0
Views: 5401
Reputation: 51
When you use GPU you need to change the dataset to a Cuda floating type tensor. After putting simple if condition, I've never seen the same error so far. I hope my sample code will be helpful to you.
# Firstly, you need to network model to cuda.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = VGG()
net.to(device) # or net.cuda()
# Secondly, you also need to change your dataset to Cuda Float Tensor.
if "GPU" in device:
images = images.type(torch.cuda.FloatTensor)
else:
images = images.type(torch.FloatTensor)
Upvotes: 0
Reputation: 4826
You have to define the child modules inside the __init__
function so that they can be registered as parameters of the module. If they are not parameters, .cuda()
would not be call on them when you call .cuda()
for the parent.
If you really needed dynamic parameters/modules declaration, take a look here. The key is apaszke's answer.
Upvotes: 1