Commit 2362ea3b authored by Ross Girshick's avatar Ross Girshick Committed by Facebook Github Bot

Quick configs for end-to-end integration testing

Reviewed By: ir413

Differential Revision: D9014510

fbshipit-source-id: a1320b1c221546ad795a94dce254e9520a0847fd
parent b5dcc0fe
...@@ -972,6 +972,9 @@ __C.EXPECTED_RESULTS = [] ...@@ -972,6 +972,9 @@ __C.EXPECTED_RESULTS = []
# Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS # Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS
__C.EXPECTED_RESULTS_RTOL = 0.1 __C.EXPECTED_RESULTS_RTOL = 0.1
__C.EXPECTED_RESULTS_ATOL = 0.005 __C.EXPECTED_RESULTS_ATOL = 0.005
# When the expected value specifies a mean and standard deviation, we check
# that the actual value is within mean +/- SIGMA_TOL * std
__C.EXPECTED_RESULTS_SIGMA_TOL = 4
# Set to send email in case of an EXPECTED_RESULTS failure # Set to send email in case of an EXPECTED_RESULTS failure
__C.EXPECTED_RESULTS_EMAIL = b'' __C.EXPECTED_RESULTS_EMAIL = b''
......
...@@ -195,6 +195,11 @@ def check_expected_results(results, atol=0.005, rtol=0.1): ...@@ -195,6 +195,11 @@ def check_expected_results(results, atol=0.005, rtol=0.1):
Expected results should take the form of a list of expectations, each Expected results should take the form of a list of expectations, each
specified by four elements: [dataset, task, metric, expected value]. For specified by four elements: [dataset, task, metric, expected value]. For
example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...]. example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...].
The expected value may also be formatted as a list [mean, std] providing
an empirical mean and standard deviation from which a valid range is computed
using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example:
[['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...]
""" """
# cfg contains a reference set of results that we want to check against # cfg contains a reference set of results that we want to check against
if len(cfg.EXPECTED_RESULTS) == 0: if len(cfg.EXPECTED_RESULTS) == 0:
...@@ -206,13 +211,28 @@ def check_expected_results(results, atol=0.005, rtol=0.1): ...@@ -206,13 +211,28 @@ def check_expected_results(results, atol=0.005, rtol=0.1):
assert metric in results[dataset][task], \ assert metric in results[dataset][task], \
'Metric {} not in results'.format(metric) 'Metric {} not in results'.format(metric)
actual_val = results[dataset][task][metric] actual_val = results[dataset][task][metric]
err = abs(actual_val - expected_val) ok = False
tol = atol + rtol * abs(expected_val) if isinstance(expected_val, list):
msg = ( assert len(expected_val) == 2, (
'{} > {} > {} sanity check (actual vs. expected): ' 'Expected result must be in (mean, std) format'
'{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}' )
).format(dataset, task, metric, actual_val, expected_val, err, tol) mean, std = expected_val
if err > tol: lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std
hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std
ok = (lo < actual_val) and (actual_val < hi)
msg = (
'{} > {} > {} sanity check (actual vs. expected): '
'{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})'
).format(dataset, task, metric, actual_val, mean, std, lo, hi)
else:
err = abs(actual_val - expected_val)
tol = atol + rtol * abs(expected_val)
ok = (err > tol)
msg = (
'{} > {} > {} sanity check (actual vs. expected): '
'{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}'
).format(dataset, task, metric, actual_val, expected_val, err, tol)
if not ok:
msg = 'FAIL: ' + msg msg = 'FAIL: ' + msg
logger.error(msg) logger.error(msg)
if cfg.EXPECTED_RESULTS_EMAIL != '': if cfg.EXPECTED_RESULTS_EMAIL != '':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment