Reputation: 1084
I have the following piece of code which loads a dataset, resizes the images to 1200*800, loads their annotations and then reports the accuracy and prediction:
# resize images
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
for i in range(len(file_names)):
print("Resizing: " + str(i))
image = skimage.io.imread(file_names[i])
image_resized = resize(image, (1200, 800),anti_aliasing=True)
resized_images.append(image_resized)
masks_prediction = np.zeros((1200, 800, len(file_names)))
for i in range(len(resized_images)):
print(i)
image = resized_images[i]
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
#load annotations
dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
for i in range(len(file_names)):
print("Resizing: " + str(i))
image = skimage.io.imread(file_names[i])
image_resized = resize(image, (1200, 800),anti_aliasing=True)
resized_images.append(image_resized)
# report the accuracy and prediction
accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
name = dataset.image_info[image_id]['id']
file_name = os.path.join(IMAGE_DIR, name)
image_id_pred = file_names.index(file_name)
merged_mask = masks_prediction[:, :, image_id_pred]
annotated_mask = dataset.load_mask(image_id)[0]
merged_annotated_mask = np.zeros((1200, 800))
for i in range(annotated_mask.shape[2]):
merged_annotated_mask[annotated_mask[:,:,i]==True] = True
accuracy += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))
However I get the following error which makes me think there is something totally wrong with the sizes:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-38-a652e79112fb> in <module>()
10 merged_annotated_mask = np.zeros((1200, 800))
11 for i in range(annotated_mask.shape[2]):
---> 12 merged_annotated_mask[annotated_mask[:,:,i]==True] = True
13 accuracy += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
14 all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
IndexError: boolean index did not match indexed array along dimension 0; dimension is 1200 but corresponding boolean dimension is 1572
I was not getting this error before I resized them into 1200*800, but then I was getting another error telling me that sizes were not matching. At this point I would really appreciate a solution which would mitigate this size issue once for all somehow. I get this problem in Python every now and then, and it's getting frustrating.
EDIT: Including the Dataset class.
############################################################
# Dataset
############################################################
class ComponentsDataset(utils.Dataset):
def load_components(self, dataset_dir, subset):
"""Load a subset of the Components dataset.
dataset_dir: Root directory of the dataset.
subset: Subset to load: train or val
"""
# Add classes.
self.add_class("components", 1, "screw")
self.add_class("components", 2, "lid")
# which dataset?
assert subset in ["train", "val", "predict"]
dataset_dir = os.path.join(dataset_dir, subset)
# We mostly care about the x and y coordinates of each region
annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
annotations = list(annotations.values()) # don't need the dict keys
# The VIA tool saves images in the JSON even if they don't have any
# annotations. Skip unannotated images.
annotations = [a for a in annotations if a['regions']]
# Add images
for a in annotations:
# Get the x, y coordinaets of points of the polygons that make up
# the outline of each object instance. There are stored in the
# shape_attributes (see json format above)
polygons = [r['shape_attributes'] for r in a['regions']]
names = [r['region_attributes'] for r in a['regions']]
# load_mask() needs the image size to convert polygons to masks.
# Unfortunately, VIA doesn't include it in JSON, so we must read
# the image. This is only managable since the dataset is tiny.
image_path = os.path.join(dataset_dir, a['filename'])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
self.add_image(
"components",
image_id=a['filename'], # use file name as a unique image id
path=image_path,
width=width, height=height,
polygons=polygons,
names=names)
def load_mask(self, image_id):
"""Generate instance masks for an image.
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
# If not a balloon dataset image, delegate to parent class.
image_info = self.image_info[image_id]
if image_info["source"] != "components":
return super(self.__class__, self).load_mask(image_id)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
class_names = info["names"]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
mask[rr, cc, i] = 1
# Assign class_ids by reading class_names
class_ids = np.zeros([len(info["polygons"])])
# In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
for i, p in enumerate(class_names):
# "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
if p['name'] == 'screw':
class_ids[i] = 1
elif p['name'] == 'lid':
class_ids[i] = 2
# assert code here to extend to other labels
class_ids = class_ids.astype(int)
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
return mask.astype(np.bool), class_ids
def image_reference(self, image_id):
"""Return the path of the image."""
info = self.image_info[image_id]
if info["source"] == "components":
return info["path"]
else:
super(self.__class__, self).image_reference(image_id)
def load_mask_hc(self, image_id):
"""Generate instance masks for an image.
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
# If not a components dataset image, delegate to parent class.
image_info = self.image_info[image_id]
if image_info["source"] != "components":
return super(self.__class__, self).load_mask(image_id)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
# "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
class_names = info["names"]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
mask[rr, cc, i] = 1
# Assign class_ids by reading class_names
class_ids = np.zeros([len(info["polygons"])])
# In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
for i, p in enumerate(class_names):
if p['name'] == 'lid':
class_ids[i] = 14
elif p['name'] == 'error':
pass
else:
class_ids[i] = int(p['name'])
# assert code here to extend to other labels
class_ids = class_ids.astype(int)
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
return mask.astype(np.bool), class_ids
Upvotes: 0
Views: 370
Reputation: 11232
The images in the list resized_images
are all correctly resized to (1200, 800). But the annotated masks are loaded from the dataset and are not resized on the fly:
annotated_mask = dataset.load_mask(image_id)[0]
The method load_mask
generates a mask image of the original height
and width
of the image, not the resized one. The mask size and image size need to match.
You could resize the mask images after loading them (similar to resizing the input images) to make this approach work.
Another option is to do a batch preprocessing - resizing all images to a common size, saving them as .jpg again and using those as the new inputs, not doing any kind of resizing in this program at all. But then you'd have to carefully adapt the other data (like the polygons) as well to match the new coordinates.
Upvotes: 1