Reputation: 1
I want to train a model that has 2 sets of classifications. The first one is the category of the object (this has been implemented) and it works, while the second is the property of the object, and here the complexity lies here because an object can have zero, one or more properties.
The model will first detect a vehicle to belong to one of the categories defined, and then get the properties of this vehicle (color, model, purpose, etc.) if any exists.
I have read the detectron2 documentation on how to register new roi_heads, I have created the CustomROIHeads trying to duplicate the tasks for (i) training the categories and (ii) training the properties
class CustomROIHeads(StandardROIHeads):
def __init__(self, cfg, input_shape):
box_head = cfg.MODEL.ROI_HEADS.NAME
box_predictor = nn.Module
num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
num_properties = cfg.MODEL.ROI_HEADS.NUM_PROPERTIES
batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
positive_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
proposal_matcher = Matcher(
cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
cfg.MODEL.ROI_HEADS.IOU_LABELS,
allow_low_quality_matches=True,
)
super().__init__(cfg, input_shape, num_classes=num_classes,
batch_size_per_image=batch_size_per_image,
positive_fraction=positive_fraction,
proposal_matcher=proposal_matcher)
self.property_fcn = nn.Linear(self.box_predictor.cls_score.in_features, num_properties)
nn.init.normal_(self.property_fcn.weight, std=0.01)
nn.init.constant_(self.property_fcn.bias, 0)
self.category_fcn = nn.Linear(self.box_predictor.cls_score.in_features, num_classes)
nn.init.normal_(self.category_fcn.weight, std=0.01)
nn.init.constant_(self.category_fcn.bias, 0)
def forward(self, images, features, proposals, targets=None):
features = [features[f] for f in self.in_features]
sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
box_features = self.box_head(box_features)
box_features = self.box_predictor(box_features)
property_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
property_features = self.box_head(box_features)
property_features = self.box_predictor(box_features)
if self.training:
losses = {}
losses.update(super().forward(images, features, proposals, targets))
property_labels = [torch.tensor(x.get("property_ids", []), dtype=torch.float32) for x in targets]
property_labels_padded = torch.nn.utils.rnn.pad_sequence(property_labels, batch_first=True, padding_value=-1)
property_labels_padded = property_labels_padded[:, :property_features.shape[1]]
property_loss = F.binary_cross_entropy_with_logits(property_features, property_labels_padded)
losses.update({"loss_property": property_loss})
return losses
else:
class_logits, box_regression = box_features.split([self.num_classes, self.num_classes * 4], dim=1)
class_prob = F.softmax(class_logits, dim=-1)
attribute_prob = attribute_features.sigmoid()
pred_instances = Instances(
image_sizes[0],
pred_boxes=Boxes(predictions),
scores=torch.max(class_prob, dim=-1)[0],
pred_classes=torch.argmax(class_prob, dim=-1),
pred_properties=property_prob,
)
return pred_instances
@classmethod
def from_config(cls, cfg, input_shape):
ret = {
"train_on_pred_boxes": cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES,
**cls._init_box_head(cfg, input_shape),
}
return ret
These are my configuration parameters
cfg = get_cfg()
cfg.OUTPUT_DIR = f"/gdrive/My Drive/attr_training"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")) # COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
cfg.DATASETS.TRAIN = ("dt_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 7
cfg.MODEL.ROI_HEADS.NUM_PROPERTIES = 13
cfg.INPUT.MASK_FORMAT = "bitmask"
cfg.MODEL.ROI_HEADS.NAME = "CustomROIHeads"
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
But I receive errors "AttributeError: 'tuple' object has no attribute 'flatten'" at this line property_features = self.box_head(box_features)
.
Just to restate my problem, I want to modify the mask-rcnn model to add a new branch that can detect the properties of the object being classified, and these properties can either be 0 or more.
Upvotes: 0
Views: 27