Commit 9185b699 authored by Ashwin Bharambe's avatar Ashwin Bharambe Committed by Facebook Github Bot

Prepare for python3 compatibility [2]

Summary:
There were two "bugs" associated with loading pickle files:

The first is easy: these files should have been `open()`ed as binary, but
weren't.

The second is slightly nuanced. The default encoding used wwhile unpickling
is 7-bit (ASCII.) However, the blobs are arbitrary 8-bit bytes which don't
agree. The absolute correct way to do this is to use `encoding="bytes"` and
then interpret the blob names either as ASCII, or better, as unicode utf-8. A
reasonable fix, however, is to treat it the encoding as 8-bit latin1 (which
agrees with the first 256 characters of Unicode anyway.)

As part of this, I also centralized all pickling operations into
`detectron.utils.io`.

This /still/ does not change the build to Python3, but I believe it is ready
now.

Reviewed By: rbgirshick

Differential Revision: D9689294

fbshipit-source-id: add1f2d784fe196df27b20e65e35922536d11a3c
parent 88dd4b1d
......@@ -30,7 +30,6 @@ import logging
import numpy as np
import os
import scipy.sparse
from six.moves import cPickle as pickle
# Must happen before importing COCO API (which imports matplotlib)
import detectron.utils.env as envu
......@@ -43,6 +42,7 @@ from detectron.core.config import cfg
from detectron.utils.timer import Timer
import detectron.datasets.dataset_catalog as dataset_catalog
import detectron.utils.boxes as box_utils
from detectron.utils.io import load_object
import detectron.utils.segms as segm_utils
logger = logging.getLogger(__name__)
......@@ -251,8 +251,8 @@ class JsonDataset(object):
):
"""Add proposals from a proposals file to an roidb."""
logger.info('Loading proposals from: {}'.format(proposal_file))
with open(proposal_file, 'r') as f:
proposals = pickle.load(f)
proposals = load_object(proposal_file)
id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix
_sort_proposals(proposals, id_field)
box_list = []
......
......@@ -26,7 +26,9 @@ import logging
import numpy as np
import os
import xml.etree.ElementTree as ET
from six.moves import cPickle
from detectron.utils.io import load_object
from detectron.utils.io import save_object
logger = logging.getLogger(__name__)
......@@ -136,12 +138,9 @@ def voc_eval(detpath,
i + 1, len(imagenames)))
# save
logger.info('Saving cached annotations to {:s}'.format(cachefile))
with open(cachefile, 'w') as f:
cPickle.dump(recs, f)
save_object(recs, cachefile)
else:
# load
with open(cachefile, 'r') as f:
recs = cPickle.load(f)
recs = load_object(cachefile)
# extract gt objects for this class
class_recs = {}
......
......@@ -24,6 +24,7 @@ import hashlib
import logging
import os
import re
import six
import sys
from six.moves import cPickle as pickle
from six.moves import urllib
......@@ -33,11 +34,32 @@ logger = logging.getLogger(__name__)
_DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'
def save_object(obj, file_name):
"""Save a Python object by pickling it."""
def save_object(obj, file_name, pickle_format=2):
"""Save a Python object by pickling it.
Unless specifically overridden, we want to save it in Pickle format=2 since this
will allow other Python2 executables to load the resulting Pickle. When we want
to completely remove Python2 backward-compatibility, we can bump it up to 3. We
should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting
file is manifested or used, external to the system.
"""
file_name = os.path.abspath(file_name)
with open(file_name, 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
pickle.dump(obj, f, pickle_format)
def load_object(file_name):
with open(file_name, 'rb') as f:
# The default encoding used while unpickling is 7-bit (ASCII.) However,
# the blobs are arbitrary 8-bit bytes which don't agree. The absolute
# correct way to do this is to use `encoding="bytes"` and then interpret
# the blob names either as ASCII, or better, as unicode utf-8. A
# reasonable fix, however, is to treat it the encoding as 8-bit latin1
# (which agrees with the first 256 characters of Unicode anyway.)
if six.PY2:
return pickle.load(f)
else:
return pickle.load(f, encoding='latin1')
def cache_url(url_or_file, cache_dir):
......
......@@ -26,13 +26,13 @@ import numpy as np
import os
import pprint
import yaml
from six.moves import cPickle as pickle
from caffe2.python import core
from caffe2.python import workspace
from detectron.core.config import cfg
from detectron.core.config import load_cfg
from detectron.utils.io import load_object
from detectron.utils.io import save_object
import detectron.utils.c2 as c2_utils
......@@ -59,8 +59,8 @@ def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
"""
logger.info('Loading weights from: {}'.format(weights_file))
ws_blobs = workspace.Blobs()
with open(weights_file, 'r') as f:
src_blobs = pickle.load(f)
src_blobs = load_object(weights_file)
if 'cfg' in src_blobs:
saved_cfg = load_cfg(src_blobs['cfg'])
configure_bbox_reg_weights(model, saved_cfg)
......
......@@ -27,10 +27,10 @@ import os
import yaml
import numpy as np
import subprocess
from six.moves import cPickle as pickle
from six.moves import shlex_quote
from detectron.core.config import cfg
from detectron.utils.io import load_object
import logging
logger = logging.getLogger(__name__)
......@@ -98,7 +98,7 @@ def process_in_parallel(
range_file = os.path.join(
output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
)
range_data = pickle.load(open(range_file))
range_data = load_object(range_file)
outputs.append(range_data)
return outputs
......
......@@ -12,9 +12,10 @@ import argparse
import numpy as np
import os
import sys
from six.moves import cPickle as pickle
import detectron.datasets.coco_to_cityscapes_id as cs
from detectron.utils.io import load_object
from detectron.utils.io import save_object
NUM_CS_CLS = 9
NUM_COCO_CLS = 81
......@@ -92,8 +93,7 @@ def remove_momentum(model_dict):
def load_and_convert_coco_model(args):
with open(args.coco_model_file_name, 'r') as f:
model_dict = pickle.load(f)
model_dict = load_object(args.coco_model_file_name)
remove_momentum(model_dict)
convert_coco_blobs_to_cityscape_blobs(model_dict)
return model_dict
......@@ -106,7 +106,6 @@ if __name__ == '__main__':
'Weights file does not exist'
weights = load_and_convert_coco_model(args)
with open(args.out_file_name, 'w') as f:
pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
save_object(weights, args.out_file_name)
print('Wrote blobs to {}:'.format(args.out_file_name))
print(sorted(weights['blobs'].keys()))
......@@ -27,9 +27,10 @@ from __future__ import unicode_literals
import numpy as np
import scipy.io as sio
import sys
from six.moves import cPickle as pickle
from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import save_object
if __name__ == '__main__':
dataset_name = sys.argv[1]
......@@ -53,8 +54,4 @@ if __name__ == '__main__':
scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32))
ids.append(roidb[i]['id'])
with open(file_out, 'wb') as f:
pickle.dump(
dict(boxes=boxes, scores=scores, indexes=ids), f,
pickle.HIGHEST_PROTOCOL
)
save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out)
......@@ -29,7 +29,6 @@ import argparse
import numpy as np
import os
import sys
from six.moves import cPickle as pickle
from caffe.proto import caffe_pb2
from caffe2.proto import caffe2_pb2
......@@ -37,6 +36,7 @@ from caffe2.python import caffe_translator
from caffe2.python import utils
from google.protobuf import text_format
from detectron.utils.io import save_object
def parse_args():
parser = argparse.ArgumentParser(
......@@ -93,8 +93,7 @@ def pickle_weights(out_file_name, weights):
normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob)
for blob in weights.protos
}
with open(out_file_name, 'w') as f:
pickle.dump(blobs, f, protocol=pickle.HIGHEST_PROTOCOL)
save_object(blobs, out_file_name)
print('Wrote blobs:')
print(sorted(blobs.keys()))
......
......@@ -33,12 +33,11 @@ from __future__ import unicode_literals
import argparse
import os
import sys
import yaml
from six.moves import cPickle as pickle
from detectron.core.config import cfg
from detectron.datasets import task_evaluation
from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
from detectron.utils.logging import setup_logging
import detectron.core.config as core_config
......@@ -85,8 +84,8 @@ def parse_args():
def do_reval(dataset_name, output_dir, args):
dataset = JsonDataset(dataset_name)
with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
dets = pickle.load(f)
dets = load_object(os.path.join(output_dir, 'detections.pkl'))
# Override config with the one saved in the detections file
if args.cfg_file is not None:
core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg']))
......
......@@ -26,9 +26,9 @@ import argparse
import cv2
import os
import sys
from six.moves import cPickle as pickle
from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
import detectron.utils.vis as vis_utils
# OpenCL may be enabled by default in OpenCV3; disable it because it's not
......@@ -84,8 +84,7 @@ def vis(dataset, detections_pkl, thresh, output_dir, limit=0):
ds = JsonDataset(dataset)
roidb = ds.get_roidb()
with open(detections_pkl, 'r') as f:
dets = pickle.load(f)
dets = load_object(detections_pkl)
assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \
'Expected detections pkl file in the format used by test_engine.py'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment