Reputation: 41
number of train data: 346
number of test data: 69
Epoch: [0] [0/346] eta: 0:35:20 lr: 0.000019 loss: -312.6024 (-312.6024) loss_classifier: 1.5789 (1.5789) loss_box_reg: 0.1299 (0.1299) loss_mask: -314.3485 (-314.3485) loss_objectness: 0.0266 (0.0266) loss_rpn_box_reg: 0.0106 (0.0106) time: 6.1275 data: 0.1599 max mem: 0
Loss is nan, stopping training
{‘loss_classifier’: tensor (nan, grad_fn = ), ‘loss_box_reg’: tensor (nan, grad_fn = ), ‘loss_mask’: tensor (nan, grad_fn = ), ’ tensor (nan, grad_fn = ), ‘loss_rpn_box_reg’: tensor (nan, grad_fn = )}
An exception has occurred, use% tb to see the full traceback.
SystemExit : 1
And this is the dataset code:
class maskrcnn_Dataset(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "masks"))))
#self.class_masks = list(sorted(os.listdir(os.path.join(root, "SegmentationClass"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.root, "images", self.imgs[idx])
x=self.imgs[idx].split('.')
mask_path = os.path.join(self.root, "masks", self.masks[idx])
#class_mask_path = os.path.join(self.root, "SegmentationClass", self.class_masks[idx])
#read and convert image to RGB
img = cv2.imread(img_path)
mask_for_all=[]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
# mask = Image.open(mask_path)
mask_folder=os.path.join(self.root,"masks")
source_mask = os.path.join(mask_folder, x[0])
#print(os.listdir(source_mask))
boxes = []
xx=trier(os.listdir(source_mask))
#print(xx)
for file_name in xx:
mask = Image.open(os.path.join(source_mask,file_name))
mask = np.array(mask)
mask_for_all.append(mask)
obj_ids = np.unique(mask)
obj_ids = obj_ids[1:]
masks = mask == obj_ids[:, None, None]
num_objs = len(obj_ids)
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
num_objs=len(boxes)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
if(self.root.find("train")!=-1):
#print("bisgltjf")
labels =class_ids_train[class_ids_train_names.index(self.imgs[idx])]
#print(labels)
else:
labels =class_ids_val[class_ids_val_names.index(self.imgs[idx])]
#print('l3assba')
#labels = np.array([])
#for i in range(masks.shape[0]):
# labels = np.append(labels, (masks[i] * class_mask).max())
labels = torch.as_tensor(labels, dtype=torch.int64)
#print(boxes,":",labels)
masks = torch.as_tensor(mask_for_all, dtype=torch.uint8)
#print(labels)
#print(masks)
#print(masks.shape)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
#print(img.shape)
#print(self.imgs[idx])
target = {}
target["boxes"] = boxes
#print(boxes)
target["labels"] = labels
#print(labels.shape)
target["masks"] = masks
#print(masks.shape)
target["image_id"] = image_id
#print(image_id.shape)
target["area"] = area
#print(area)
target["iscrowd"] = iscrowd
#print(iscrowd.shape)
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
Upvotes: 2
Views: 903
Reputation: 1
I read your log information, your the "loss_mask" first became nan, and I guess there may have been a problem calculating the "loss_mask", so I suggest you:
Upvotes: 0
Reputation: 11
There can be two issues:
Check the coordinate of boxes, make sure [xmin, ymin, xmax, ymax] is positive
Make sure the mask's length is the same as boxes.
Upvotes: 1