Rohan Singh
Rohan Singh

Reputation: 471

How to make a class in pytorch use GPU

So I am running some code and getting the following error in Pytorch: "RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same"

From what I understand, this means that my model may not be pushed to the GPU, while the input data already is using the GPU. I can share my code if that would help (I am refraining from doing it right now since it is longer than a small code snippet).

I am aware that I can do something like

    myModel=Model()
    myModel.cuda()

However, I am making a class as part of a custom module that will be added to a Sequential wrapper. So, I can't really make an object out of it (I'm not good with OOP terminology, so I apologize for any technical writing mistakes). I was wondering if there is a way to get around this issue, and make the class always use the GPU, even though I never explicitly define an object?

If this was not clear enough, I can post my code, but as previously warned it may take some time to go through (not too long, but not very convenient either).

Any help is much appreciated.

Edit: Here is the code, I presume the issue is in the RLSTM class, since there was not an error before I added this.

class VGG(nn.Module):
'''
VGG model 
'''
def __init__(self, features): # features represents the layers array
    super(VGG, self).__init__()
    self.features = features
    self.classifier = nn.Sequential(
        nn.Dropout(),
        nn.Linear(512,512),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(512, 512),
        nn.ReLU(True),
        nn.Linear(512, 10),
    )
     # Initialize weights
    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
            m.bias.data.zero_()


def forward(self, x): # x is the image, we run x through the layers
    print(x.size())
    x = self.features(x) # runs through all features, where each feature is a function
    x = x.view(x.size(0), -1) 
    # after running through features, does sequential steps to finally classify
    x = self.classifier(x)
    # print(x)
    return x


def make_layers(cfg, batch_norm=False):
   # print("Making layers!")
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
            layers+=[RLSTM()]

    return nn.Sequential(*layers)

class RLSTM(nn.Module):
def __init__(self):
    super(RLSTM,self).__init__()



def forward(self, image):
    print("going in rowlstm")
    global current
    global _layer
    global isgates
    size = image.size()
    b = size[0]
    indvs = list(image.split(1,0)) # split up the batch into individual images
    #print(indvs[0].size())
    tensor_array = []
    for i in range(b):
        current = 0
        _layer = []
        isgates = []
        tensor_array.append(self.RowLSTM(indvs[i]))

    seq=tuple(tensor_array)
    trans = torch.cat(seq,0)
    return trans.cuda() # trying to make floattensor error go away 
def RowLSTM(self, image): 
    global current
    global _layer
    global isgates


    # input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info

# the input to state convolution should only be computed one time 
    if current==0:
        n = image.size()[2]
        ch=image.size()[1]
        input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
        isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
        cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
        # now have dummy, learnable variables for first row
        _layer.append(cell)

    else:   
        Cell_prev = _layer[current-1] # access previous row
        hidPrev = Cell_prev.getHiddenState() 
        ch = image.size()[1] 
    #   print("about to apply conv1d")
        state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
    #   print("applied conv1d") 
        prevHid=Cell_prev.getHiddenState()
        ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
        gates = self.addGates(isgates,ssgates,current)
        # split gates
        ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
        cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
        cell.compute()
        _layer.append(cell)
    # attempting to eliminate requirement of getting size

    #print(current)
    try:

        current+=1
        y=(isgates[0][0][1][current])
        return self.RowLSTM(image) 
    except Exception as error:
        concats=[]
        for cell in _layer:
            tensor=torch.unsqueeze(cell.h,0)

            concats.append(tensor)
        seq=tuple(concats)
        tensor=torch.cat(seq,3)
        return tensor

def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
    inputStateGates={}
    size=tensor.size() # 1 x 4h x n x n
    out_ft=size[1] # get 4h for the nxnx4h tensor
    num=size[2] # get n for the nxn image
    hh=out_ft/4 # we want to split the tensor into 4, for the gates
    tensor = torch.squeeze(tensor) # 4h x n x n

    # First, split by row: Creates n tensors of 4h x n x 1
    rows = list(tensor.split(1,2))

    for i in range(num):
        # Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
        row=rows[i]
        inputStateGates[i]=list(row.split(hh,0))

    return inputStateGates 


def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1 
    size=tensor.size() 
    out_ft=size[1] # get 4h for the 1x4hxn tensor
    num=size[2] # get n for the 1xhxn row
    hh=out_ft/4 # we want to split the tensor into 4, for the gates
    tensor = tensor.squeeze(0) # 4h x n x 1
    splitted=list(tensor.split(hh,0))
    return splitted 


def addGates(i2s,s2s,key):
    """ these dictionaries are of form {key : [[i], [o], [f], [g]]}
        we want to add pairwise elemeents """

    # i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
    # s2s is of form [[h,n],[h,n],[h,n], [h,n]]
    gateSum = []
    for i in range(4): # always of length 4, representing the gates
        gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))

    return gateSum

Upvotes: 0

Views: 5401

Answers (2)

Bongsang
Bongsang

Reputation: 51

When you use GPU you need to change the dataset to a Cuda floating type tensor. After putting simple if condition, I've never seen the same error so far. I hope my sample code will be helpful to you.

# Firstly, you need to network model to cuda.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = VGG()
net.to(device) # or net.cuda()

# Secondly, you also need to change your dataset to Cuda Float Tensor.
if "GPU" in device:
    images = images.type(torch.cuda.FloatTensor)
else:
    images = images.type(torch.FloatTensor)

Upvotes: 0

hkchengrex
hkchengrex

Reputation: 4826

You have to define the child modules inside the __init__ function so that they can be registered as parameters of the module. If they are not parameters, .cuda() would not be call on them when you call .cuda() for the parent.

If you really needed dynamic parameters/modules declaration, take a look here. The key is apaszke's answer.

Upvotes: 1

Related Questions