Ruxy1212
Ruxy1212

Reputation: 1

Detectron2: How do I add a new roi_head and make this branch predict multiple classes?

I want to train a model that has 2 sets of classifications. The first one is the category of the object (this has been implemented) and it works, while the second is the property of the object, and here the complexity lies here because an object can have zero, one or more properties.

The model will first detect a vehicle to belong to one of the categories defined, and then get the properties of this vehicle (color, model, purpose, etc.) if any exists.

I have read the detectron2 documentation on how to register new roi_heads, I have created the CustomROIHeads trying to duplicate the tasks for (i) training the categories and (ii) training the properties

class CustomROIHeads(StandardROIHeads):
    def __init__(self, cfg, input_shape):
        box_head = cfg.MODEL.ROI_HEADS.NAME
        box_predictor = nn.Module

        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        num_properties = cfg.MODEL.ROI_HEADS.NUM_PROPERTIES

        batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
        positive_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
        proposal_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        super().__init__(cfg, input_shape, num_classes=num_classes,
        batch_size_per_image=batch_size_per_image,
        positive_fraction=positive_fraction,
        proposal_matcher=proposal_matcher)

        self.property_fcn = nn.Linear(self.box_predictor.cls_score.in_features, num_properties)
        nn.init.normal_(self.property_fcn.weight, std=0.01)
        nn.init.constant_(self.property_fcn.bias, 0)

        self.category_fcn = nn.Linear(self.box_predictor.cls_score.in_features, num_classes)
        nn.init.normal_(self.category_fcn.weight, std=0.01)
        nn.init.constant_(self.category_fcn.bias, 0)

    def forward(self, images, features, proposals, targets=None):
        features = [features[f] for f in self.in_features]
        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE

        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
        box_features = self.box_head(box_features)
        box_features = self.box_predictor(box_features)

        property_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
        property_features = self.box_head(box_features)
        property_features = self.box_predictor(box_features)

        if self.training:
            losses = {}
            losses.update(super().forward(images, features, proposals, targets))
            property_labels = [torch.tensor(x.get("property_ids", []), dtype=torch.float32) for x in targets]
            property_labels_padded = torch.nn.utils.rnn.pad_sequence(property_labels, batch_first=True, padding_value=-1)
            property_labels_padded = property_labels_padded[:, :property_features.shape[1]]
            property_loss = F.binary_cross_entropy_with_logits(property_features, property_labels_padded)
            losses.update({"loss_property": property_loss})

            return losses
        else:
            class_logits, box_regression = box_features.split([self.num_classes, self.num_classes * 4], dim=1)
            class_prob = F.softmax(class_logits, dim=-1)
            attribute_prob = attribute_features.sigmoid()
            pred_instances = Instances(
                image_sizes[0],
                pred_boxes=Boxes(predictions),
                scores=torch.max(class_prob, dim=-1)[0],
                pred_classes=torch.argmax(class_prob, dim=-1),
                pred_properties=property_prob,
            )
            return pred_instances

    @classmethod
    def from_config(cls, cfg, input_shape):
        ret = {
            "train_on_pred_boxes": cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES,
            **cls._init_box_head(cfg, input_shape),
        }
        return ret 

These are my configuration parameters

cfg = get_cfg()
cfg.OUTPUT_DIR = f"/gdrive/My Drive/attr_training"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")) # COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
cfg.DATASETS.TRAIN = ("dt_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1000 
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 7
cfg.MODEL.ROI_HEADS.NUM_PROPERTIES = 13
cfg.INPUT.MASK_FORMAT = "bitmask"
cfg.MODEL.ROI_HEADS.NAME = "CustomROIHeads"
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

But I receive errors "AttributeError: 'tuple' object has no attribute 'flatten'" at this line property_features = self.box_head(box_features).

Just to restate my problem, I want to modify the mask-rcnn model to add a new branch that can detect the properties of the object being classified, and these properties can either be 0 or more.

Upvotes: 0

Views: 27

Answers (0)

Related Questions