Unverified Commit 9a1ba140 authored by Francisco Massa's avatar Francisco Massa Committed by GitHub

add support for pascal voc dataset and evaluate (#207)

* add support for pascal voc dataset and evaluate

* optimization for adding voc dataset

* make inference.py dataset-agnostic; add use_difficult option to voc dataset

* handle voc difficult objects correctly

* Remove dependency on lxml plus minor improvements

* More cleanups

* More comments and improvements

* Lint fix

* Move configs to their own folder
parent 7bc87084
...@@ -83,6 +83,8 @@ ln -s /path_to_coco_dataset/annotations datasets/coco/annotations ...@@ -83,6 +83,8 @@ ln -s /path_to_coco_dataset/annotations datasets/coco/annotations
ln -s /path_to_coco_dataset/train2014 datasets/coco/train2014 ln -s /path_to_coco_dataset/train2014 datasets/coco/train2014
ln -s /path_to_coco_dataset/test2014 datasets/coco/test2014 ln -s /path_to_coco_dataset/test2014 datasets/coco/test2014
ln -s /path_to_coco_dataset/val2014 datasets/coco/val2014 ln -s /path_to_coco_dataset/val2014 datasets/coco/val2014
# for pascal voc dataset:
ln -s /path_to_VOCdevkit_dir datasets/voc
``` ```
You can also configure your own paths to the datasets. You can also configure your own paths to the datasets.
......
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 300
ANCHOR_SIZES: (128, 256, 512)
ROI_BOX_HEAD:
NUM_CLASSES: 21
DATASETS:
TRAIN: ("voc_2007_trainval",)
TEST: ("voc_2007_test",)
SOLVER:
BASE_LR: 0.001
WEIGHT_DECAY: 0.0001
STEPS: (50000, )
MAX_ITER: 70000
IMS_PER_BATCH: 1
TEST:
IMS_PER_BATCH: 1
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 300
ANCHOR_SIZES: (128, 256, 512)
ROI_BOX_HEAD:
NUM_CLASSES: 21
DATASETS:
TRAIN: ("voc_2007_trainval",)
TEST: ("voc_2007_test",)
SOLVER:
BASE_LR: 0.004
WEIGHT_DECAY: 0.0001
STEPS: (12500, )
MAX_ITER: 17500
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 4
...@@ -21,6 +21,13 @@ class DatasetCatalog(object): ...@@ -21,6 +21,13 @@ class DatasetCatalog(object):
"coco/val2014", "coco/val2014",
"coco/annotations/instances_valminusminival2014.json", "coco/annotations/instances_valminusminival2014.json",
), ),
"voc_2007_trainval": ("voc/VOC2007", 'trainval'),
"voc_2007_test": ("voc/VOC2007", 'test'),
"voc_2012_train": ("voc/VOC2012", 'train'),
"voc_2012_trainval": ("voc/VOC2012", 'trainval'),
"voc_2012_val": ("voc/VOC2012", 'val'),
"voc_2012_test": ("voc/VOC2012", 'test'),
} }
@staticmethod @staticmethod
...@@ -36,6 +43,17 @@ class DatasetCatalog(object): ...@@ -36,6 +43,17 @@ class DatasetCatalog(object):
factory="COCODataset", factory="COCODataset",
args=args, args=args,
) )
elif "voc" in name:
data_dir = DatasetCatalog.DATA_DIR
attrs = DatasetCatalog.DATASETS[name]
args = dict(
data_dir=os.path.join(data_dir, attrs[0]),
split=attrs[1],
)
return dict(
factory="PascalVOCDataset",
args=args,
)
raise RuntimeError("Dataset not available: {}".format(name)) raise RuntimeError("Dataset not available: {}".format(name))
......
...@@ -26,7 +26,8 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True): ...@@ -26,7 +26,8 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
""" """
if not isinstance(dataset_list, (list, tuple)): if not isinstance(dataset_list, (list, tuple)):
raise RuntimeError( raise RuntimeError(
"dataset_list should be a list of strings, got {}".format(dataset_list)) "dataset_list should be a list of strings, got {}".format(dataset_list)
)
datasets = [] datasets = []
for dataset_name in dataset_list: for dataset_name in dataset_list:
data = dataset_catalog.get(dataset_name) data = dataset_catalog.get(dataset_name)
...@@ -36,6 +37,8 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True): ...@@ -36,6 +37,8 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
# during training # during training
if data["factory"] == "COCODataset": if data["factory"] == "COCODataset":
args["remove_images_without_annotations"] = is_train args["remove_images_without_annotations"] = is_train
if data["factory"] == "PascalVOCDataset":
args["use_difficult"] = not is_train
args["transforms"] = transforms args["transforms"] = transforms
# make dataset from factory # make dataset from factory
dataset = factory(**args) dataset = factory(**args)
...@@ -95,7 +98,9 @@ def make_batch_data_sampler( ...@@ -95,7 +98,9 @@ def make_batch_data_sampler(
sampler, images_per_batch, drop_last=False sampler, images_per_batch, drop_last=False
) )
if num_iters is not None: if num_iters is not None:
batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iters, start_iter) batch_sampler = samplers.IterationBasedBatchSampler(
batch_sampler, num_iters, start_iter
)
return batch_sampler return batch_sampler
......
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .coco import COCODataset from .coco import COCODataset
from .voc import PascalVOCDataset
from .concat_dataset import ConcatDataset from .concat_dataset import ConcatDataset
__all__ = ["COCODataset", "ConcatDataset"] __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"]
...@@ -11,7 +11,6 @@ class COCODataset(torchvision.datasets.coco.CocoDetection): ...@@ -11,7 +11,6 @@ class COCODataset(torchvision.datasets.coco.CocoDetection):
self, ann_file, root, remove_images_without_annotations, transforms=None self, ann_file, root, remove_images_without_annotations, transforms=None
): ):
super(COCODataset, self).__init__(root, ann_file) super(COCODataset, self).__init__(root, ann_file)
# sort indices for reproducible results # sort indices for reproducible results
self.ids = sorted(self.ids) self.ids = sorted(self.ids)
......
from maskrcnn_benchmark.data import datasets
from .coco import coco_evaluation
from .voc import voc_evaluation
def evaluate(dataset, predictions, output_folder, **kwargs):
"""evaluate dataset using different methods based on dataset type.
Args:
dataset: Dataset object
predictions(list[BoxList]): each item in the list represents the
prediction results for one image.
output_folder: output folder, to save evaluation files or results.
**kwargs: other args.
Returns:
evaluation result
"""
args = dict(
dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
)
if isinstance(dataset, datasets.COCODataset):
return coco_evaluation(**args)
elif isinstance(dataset, datasets.PascalVOCDataset):
return voc_evaluation(**args)
else:
dataset_name = dataset.__class__.__name__
raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
from .coco_eval import do_coco_evaluation
def coco_evaluation(
dataset,
predictions,
output_folder,
box_only,
iou_types,
expected_results,
expected_results_sigma_tol,
):
return do_coco_evaluation(
dataset=dataset,
predictions=predictions,
box_only=box_only,
output_folder=output_folder,
iou_types=iou_types,
expected_results=expected_results,
expected_results_sigma_tol=expected_results_sigma_tol,
)
This diff is collapsed.
import logging
from .voc_eval import do_voc_evaluation
def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
logger = logging.getLogger("maskrcnn_benchmark.inference")
if box_only:
logger.warning("voc evaluation doesn't support box_only, ignored.")
logger.info("performing voc evaluation, ignored iou_types.")
return do_voc_evaluation(
dataset=dataset,
predictions=predictions,
output_folder=output_folder,
logger=logger,
)
# A modification version from chainercv repository.
# (See https://github.com/chainer/chainercv/blob/master/chainercv/evaluations/eval_detection_voc.py)
from __future__ import division
import os
from collections import defaultdict
import numpy as np
from maskrcnn_benchmark.structures.bounding_box import BoxList
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
def do_voc_evaluation(dataset, predictions, output_folder, logger):
# TODO need to make the use_07_metric format available
# for the user to choose
pred_boxlists = []
gt_boxlists = []
for image_id, prediction in enumerate(predictions):
img_info = dataset.get_img_info(image_id)
if len(prediction) == 0:
continue
image_width = img_info["width"]
image_height = img_info["height"]
prediction = prediction.resize((image_width, image_height))
pred_boxlists.append(prediction)
gt_boxlist = dataset.get_groundtruth(image_id)
gt_boxlists.append(gt_boxlist)
result = eval_detection_voc(
pred_boxlists=pred_boxlists,
gt_boxlists=gt_boxlists,
iou_thresh=0.5,
use_07_metric=True,
)
result_str = "mAP: {:.4f}\n".format(result["map"])
for i, ap in enumerate(result["ap"]):
if i == 0: # skip background
continue
result_str += "{:<16}: {:.4f}\n".format(
dataset.map_class_id_to_class_name(i), ap
)
logger.info(result_str)
if output_folder:
with open(os.path.join(output_folder, "result.txt"), "w") as fid:
fid.write(result_str)
return result
def eval_detection_voc(pred_boxlists, gt_boxlists, iou_thresh=0.5, use_07_metric=False):
"""Evaluate on voc dataset.
Args:
pred_boxlists(list[BoxList]): pred boxlist, has labels and scores fields.
gt_boxlists(list[BoxList]): ground truth boxlist, has labels field.
iou_thresh: iou thresh
use_07_metric: boolean
Returns:
dict represents the results
"""
assert len(gt_boxlists) == len(
pred_boxlists
), "Length of gt and pred lists need to be same."
prec, rec = calc_detection_voc_prec_rec(
pred_boxlists=pred_boxlists, gt_boxlists=gt_boxlists, iou_thresh=iou_thresh
)
ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
return {"ap": ap, "map": np.nanmean(ap)}
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5):
"""Calculate precision and recall based on evaluation code of PASCAL VOC.
This function calculates precision and recall of
predicted bounding boxes obtained from a dataset which has :math:`N`
images.
The code is based on the evaluation code used in PASCAL VOC Challenge.
"""
n_pos = defaultdict(int)
score = defaultdict(list)
match = defaultdict(list)
for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists):
pred_bbox = pred_boxlist.bbox.numpy()
pred_label = pred_boxlist.get_field("labels").numpy()
pred_score = pred_boxlist.get_field("scores").numpy()
gt_bbox = gt_boxlist.bbox.numpy()
gt_label = gt_boxlist.get_field("labels").numpy()
gt_difficult = gt_boxlist.get_field("difficult").numpy()
for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
pred_mask_l = pred_label == l
pred_bbox_l = pred_bbox[pred_mask_l]
pred_score_l = pred_score[pred_mask_l]
# sort by score
order = pred_score_l.argsort()[::-1]
pred_bbox_l = pred_bbox_l[order]
pred_score_l = pred_score_l[order]
gt_mask_l = gt_label == l
gt_bbox_l = gt_bbox[gt_mask_l]
gt_difficult_l = gt_difficult[gt_mask_l]
n_pos[l] += np.logical_not(gt_difficult_l).sum()
score[l].extend(pred_score_l)
if len(pred_bbox_l) == 0:
continue
if len(gt_bbox_l) == 0:
match[l].extend((0,) * pred_bbox_l.shape[0])
continue
# VOC evaluation follows integer typed bounding boxes.
pred_bbox_l = pred_bbox_l.copy()
pred_bbox_l[:, 2:] += 1
gt_bbox_l = gt_bbox_l.copy()
gt_bbox_l[:, 2:] += 1
iou = boxlist_iou(
BoxList(pred_bbox_l, gt_boxlist.size),
BoxList(gt_bbox_l, gt_boxlist.size),
).numpy()
gt_index = iou.argmax(axis=1)
# set -1 if there is no matching ground truth
gt_index[iou.max(axis=1) < iou_thresh] = -1
del iou
selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
for gt_idx in gt_index:
if gt_idx >= 0:
if gt_difficult_l[gt_idx]:
match[l].append(-1)
else:
if not selec[gt_idx]:
match[l].append(1)
else:
match[l].append(0)
selec[gt_idx] = True
else:
match[l].append(0)
n_fg_class = max(n_pos.keys()) + 1
prec = [None] * n_fg_class
rec = [None] * n_fg_class
for l in n_pos.keys():
score_l = np.array(score[l])
match_l = np.array(match[l], dtype=np.int8)
order = score_l.argsort()[::-1]
match_l = match_l[order]
tp = np.cumsum(match_l == 1)
fp = np.cumsum(match_l == 0)
# If an element of fp + tp is 0,
# the corresponding element of prec[l] is nan.
prec[l] = tp / (fp + tp)
# If n_pos[l] is 0, rec[l] is None.
if n_pos[l] > 0:
rec[l] = tp / n_pos[l]
return prec, rec
def calc_detection_voc_ap(prec, rec, use_07_metric=False):
"""Calculate average precisions based on evaluation code of PASCAL VOC.
This function calculates average precisions
from given precisions and recalls.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
prec (list of numpy.array): A list of arrays.
:obj:`prec[l]` indicates precision for class :math:`l`.
If :obj:`prec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
rec (list of numpy.array): A list of arrays.
:obj:`rec[l]` indicates recall for class :math:`l`.
If :obj:`rec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
for calculating average precision. The default value is
:obj:`False`.
Returns:
~numpy.ndarray:
This function returns an array of average precisions.
The :math:`l`-th value corresponds to the average precision
for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
:obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
"""
n_fg_class = len(prec)
ap = np.empty(n_fg_class)
for l in range(n_fg_class):
if prec[l] is None or rec[l] is None:
ap[l] = np.nan
continue
if use_07_metric:
# 11 point metric
ap[l] = 0
for t in np.arange(0.0, 1.1, 0.1):
if np.sum(rec[l] >= t) == 0:
p = 0
else:
p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
ap[l] += p / 11
else:
# correct AP calculation
# first append sentinel values at the end
mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
mrec = np.concatenate(([0], rec[l], [1]))
mpre = np.maximum.accumulate(mpre[::-1])[::-1]
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
import os
import torch
import torch.utils.data
from PIL import Image
import sys
if sys.version_info[0] == 2:
import xml.etree.cElementTree as ET
else:
import xml.etree.ElementTree as ET
from maskrcnn_benchmark.structures.bounding_box import BoxList
class PascalVOCDataset(torch.utils.data.Dataset):
CLASSES = (
"__background__ ",
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
)
def __init__(self, data_dir, split, use_difficult=False, transforms=None):
self.root = data_dir
self.image_set = split
self.keep_difficult = use_difficult
self.transforms = transforms
self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
with open(self._imgsetpath % self.image_set) as f:
self.ids = f.readlines()
self.ids = [x.strip("\n") for x in self.ids]
self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
cls = PascalVOCDataset.CLASSES
self.class_to_ind = dict(zip(cls, range(len(cls))))
def __getitem__(self, index):
img_id = self.ids[index]
img = Image.open(self._imgpath % img_id).convert("RGB")
target = self.get_groundtruth(index)
target = target.clip_to_image(remove_empty=True)
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target, index
def __len__(self):
return len(self.ids)
def get_groundtruth(self, index):
img_id = self.ids[index]
anno = ET.parse(self._annopath % img_id).getroot()
anno = self._preprocess_annotation(anno)
height, width = anno["im_info"]
target = BoxList(anno["boxes"], (width, height), mode="xyxy")
target.add_field("labels", anno["labels"])
target.add_field("difficult", anno["difficult"])
return target
def _preprocess_annotation(self, target):
boxes = []
gt_classes = []
difficult_boxes = []
for obj in target.iter("object"):
difficult = int(obj.find("difficult").text) == 1
if not self.keep_difficult and difficult:
continue
name = obj.find("name").text.lower().strip()
bb = obj.find("bndbox")
bndbox = tuple(
map(
int,
[
bb.find("xmin").text,
bb.find("ymin").text,
bb.find("xmax").text,
bb.find("ymax").text,
],
)
)
boxes.append(bndbox)
gt_classes.append(self.class_to_ind[name])
difficult_boxes.append(difficult)
size = target.find("size")
im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
res = {
"boxes": torch.tensor(boxes, dtype=torch.float32),
"labels": torch.tensor(gt_classes),
"difficult": torch.tensor(difficult_boxes),
"im_info": im_info,
}
return res
def get_img_info(self, index):
img_id = self.ids[index]
anno = ET.parse(self._annopath % img_id).getroot()
size = anno.find("size")
im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
return {"height": im_info[0], "width": im_info[1]}
def map_class_id_to_class_name(self, class_id):
return PascalVOCDataset.CLASSES[class_id]
...@@ -6,4 +6,3 @@ from .transforms import ToTensor ...@@ -6,4 +6,3 @@ from .transforms import ToTensor
from .transforms import Normalize from .transforms import Normalize
from .build import build_transforms from .build import build_transforms
This diff is collapsed.
...@@ -68,17 +68,18 @@ def main(): ...@@ -68,17 +68,18 @@ def main():
if cfg.MODEL.MASK_ON: if cfg.MODEL.MASK_ON:
iou_types = iou_types + ("segm",) iou_types = iou_types + ("segm",)
output_folders = [None] * len(cfg.DATASETS.TEST) output_folders = [None] * len(cfg.DATASETS.TEST)
if cfg.OUTPUT_DIR:
dataset_names = cfg.DATASETS.TEST dataset_names = cfg.DATASETS.TEST
if cfg.OUTPUT_DIR:
for idx, dataset_name in enumerate(dataset_names): for idx, dataset_name in enumerate(dataset_names):
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
mkdir(output_folder) mkdir(output_folder)
output_folders[idx] = output_folder output_folders[idx] = output_folder
data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
for output_folder, data_loader_val in zip(output_folders, data_loaders_val): for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
inference( inference(
model, model,
data_loader_val, data_loader_val,
dataset_name=dataset_name,
iou_types=iou_types, iou_types=iou_types,
box_only=cfg.MODEL.RPN_ONLY, box_only=cfg.MODEL.RPN_ONLY,
device=cfg.MODEL.DEVICE, device=cfg.MODEL.DEVICE,
......
...@@ -84,17 +84,18 @@ def test(cfg, model, distributed): ...@@ -84,17 +84,18 @@ def test(cfg, model, distributed):
if cfg.MODEL.MASK_ON: if cfg.MODEL.MASK_ON:
iou_types = iou_types + ("segm",) iou_types = iou_types + ("segm",)
output_folders = [None] * len(cfg.DATASETS.TEST) output_folders = [None] * len(cfg.DATASETS.TEST)
if cfg.OUTPUT_DIR:
dataset_names = cfg.DATASETS.TEST dataset_names = cfg.DATASETS.TEST
if cfg.OUTPUT_DIR:
for idx, dataset_name in enumerate(dataset_names): for idx, dataset_name in enumerate(dataset_names):
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
mkdir(output_folder) mkdir(output_folder)
output_folders[idx] = output_folder output_folders[idx] = output_folder
data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
for output_folder, data_loader_val in zip(output_folders, data_loaders_val): for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
inference( inference(
model, model,
data_loader_val, data_loader_val,
dataset_name=dataset_name,
iou_types=iou_types, iou_types=iou_types,
box_only=cfg.MODEL.RPN_ONLY, box_only=cfg.MODEL.RPN_ONLY,
device=cfg.MODEL.DEVICE, device=cfg.MODEL.DEVICE,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment