Commit 9185b699 authored by Ashwin Bharambe's avatar Ashwin Bharambe Committed by Facebook Github Bot

Prepare for python3 compatibility [2]

Summary:
There were two "bugs" associated with loading pickle files:

The first is easy: these files should have been `open()`ed as binary, but
weren't.

The second is slightly nuanced. The default encoding used wwhile unpickling
is 7-bit (ASCII.) However, the blobs are arbitrary 8-bit bytes which don't
agree. The absolute correct way to do this is to use `encoding="bytes"` and
then interpret the blob names either as ASCII, or better, as unicode utf-8. A
reasonable fix, however, is to treat it the encoding as 8-bit latin1 (which
agrees with the first 256 characters of Unicode anyway.)

As part of this, I also centralized all pickling operations into
`detectron.utils.io`.

This /still/ does not change the build to Python3, but I believe it is ready
now.

Reviewed By: rbgirshick

Differential Revision: D9689294

fbshipit-source-id: add1f2d784fe196df27b20e65e35922536d11a3c
parent 88dd4b1d
...@@ -30,7 +30,6 @@ import logging ...@@ -30,7 +30,6 @@ import logging
import numpy as np import numpy as np
import os import os
import scipy.sparse import scipy.sparse
from six.moves import cPickle as pickle
# Must happen before importing COCO API (which imports matplotlib) # Must happen before importing COCO API (which imports matplotlib)
import detectron.utils.env as envu import detectron.utils.env as envu
...@@ -43,6 +42,7 @@ from detectron.core.config import cfg ...@@ -43,6 +42,7 @@ from detectron.core.config import cfg
from detectron.utils.timer import Timer from detectron.utils.timer import Timer
import detectron.datasets.dataset_catalog as dataset_catalog import detectron.datasets.dataset_catalog as dataset_catalog
import detectron.utils.boxes as box_utils import detectron.utils.boxes as box_utils
from detectron.utils.io import load_object
import detectron.utils.segms as segm_utils import detectron.utils.segms as segm_utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -251,8 +251,8 @@ class JsonDataset(object): ...@@ -251,8 +251,8 @@ class JsonDataset(object):
): ):
"""Add proposals from a proposals file to an roidb.""" """Add proposals from a proposals file to an roidb."""
logger.info('Loading proposals from: {}'.format(proposal_file)) logger.info('Loading proposals from: {}'.format(proposal_file))
with open(proposal_file, 'r') as f: proposals = load_object(proposal_file)
proposals = pickle.load(f)
id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix
_sort_proposals(proposals, id_field) _sort_proposals(proposals, id_field)
box_list = [] box_list = []
......
...@@ -26,7 +26,9 @@ import logging ...@@ -26,7 +26,9 @@ import logging
import numpy as np import numpy as np
import os import os
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from six.moves import cPickle
from detectron.utils.io import load_object
from detectron.utils.io import save_object
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -136,12 +138,9 @@ def voc_eval(detpath, ...@@ -136,12 +138,9 @@ def voc_eval(detpath,
i + 1, len(imagenames))) i + 1, len(imagenames)))
# save # save
logger.info('Saving cached annotations to {:s}'.format(cachefile)) logger.info('Saving cached annotations to {:s}'.format(cachefile))
with open(cachefile, 'w') as f: save_object(recs, cachefile)
cPickle.dump(recs, f)
else: else:
# load recs = load_object(cachefile)
with open(cachefile, 'r') as f:
recs = cPickle.load(f)
# extract gt objects for this class # extract gt objects for this class
class_recs = {} class_recs = {}
......
...@@ -24,6 +24,7 @@ import hashlib ...@@ -24,6 +24,7 @@ import hashlib
import logging import logging
import os import os
import re import re
import six
import sys import sys
from six.moves import cPickle as pickle from six.moves import cPickle as pickle
from six.moves import urllib from six.moves import urllib
...@@ -33,11 +34,32 @@ logger = logging.getLogger(__name__) ...@@ -33,11 +34,32 @@ logger = logging.getLogger(__name__)
_DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron' _DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'
def save_object(obj, file_name): def save_object(obj, file_name, pickle_format=2):
"""Save a Python object by pickling it.""" """Save a Python object by pickling it.
Unless specifically overridden, we want to save it in Pickle format=2 since this
will allow other Python2 executables to load the resulting Pickle. When we want
to completely remove Python2 backward-compatibility, we can bump it up to 3. We
should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting
file is manifested or used, external to the system.
"""
file_name = os.path.abspath(file_name) file_name = os.path.abspath(file_name)
with open(file_name, 'wb') as f: with open(file_name, 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) pickle.dump(obj, f, pickle_format)
def load_object(file_name):
with open(file_name, 'rb') as f:
# The default encoding used while unpickling is 7-bit (ASCII.) However,
# the blobs are arbitrary 8-bit bytes which don't agree. The absolute
# correct way to do this is to use `encoding="bytes"` and then interpret
# the blob names either as ASCII, or better, as unicode utf-8. A
# reasonable fix, however, is to treat it the encoding as 8-bit latin1
# (which agrees with the first 256 characters of Unicode anyway.)
if six.PY2:
return pickle.load(f)
else:
return pickle.load(f, encoding='latin1')
def cache_url(url_or_file, cache_dir): def cache_url(url_or_file, cache_dir):
......
...@@ -26,13 +26,13 @@ import numpy as np ...@@ -26,13 +26,13 @@ import numpy as np
import os import os
import pprint import pprint
import yaml import yaml
from six.moves import cPickle as pickle
from caffe2.python import core from caffe2.python import core
from caffe2.python import workspace from caffe2.python import workspace
from detectron.core.config import cfg from detectron.core.config import cfg
from detectron.core.config import load_cfg from detectron.core.config import load_cfg
from detectron.utils.io import load_object
from detectron.utils.io import save_object from detectron.utils.io import save_object
import detectron.utils.c2 as c2_utils import detectron.utils.c2 as c2_utils
...@@ -59,8 +59,8 @@ def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): ...@@ -59,8 +59,8 @@ def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
""" """
logger.info('Loading weights from: {}'.format(weights_file)) logger.info('Loading weights from: {}'.format(weights_file))
ws_blobs = workspace.Blobs() ws_blobs = workspace.Blobs()
with open(weights_file, 'r') as f: src_blobs = load_object(weights_file)
src_blobs = pickle.load(f)
if 'cfg' in src_blobs: if 'cfg' in src_blobs:
saved_cfg = load_cfg(src_blobs['cfg']) saved_cfg = load_cfg(src_blobs['cfg'])
configure_bbox_reg_weights(model, saved_cfg) configure_bbox_reg_weights(model, saved_cfg)
......
...@@ -27,10 +27,10 @@ import os ...@@ -27,10 +27,10 @@ import os
import yaml import yaml
import numpy as np import numpy as np
import subprocess import subprocess
from six.moves import cPickle as pickle
from six.moves import shlex_quote from six.moves import shlex_quote
from detectron.core.config import cfg from detectron.core.config import cfg
from detectron.utils.io import load_object
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -98,7 +98,7 @@ def process_in_parallel( ...@@ -98,7 +98,7 @@ def process_in_parallel(
range_file = os.path.join( range_file = os.path.join(
output_dir, '%s_range_%s_%s.pkl' % (tag, start, end) output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
) )
range_data = pickle.load(open(range_file)) range_data = load_object(range_file)
outputs.append(range_data) outputs.append(range_data)
return outputs return outputs
......
...@@ -12,9 +12,10 @@ import argparse ...@@ -12,9 +12,10 @@ import argparse
import numpy as np import numpy as np
import os import os
import sys import sys
from six.moves import cPickle as pickle
import detectron.datasets.coco_to_cityscapes_id as cs import detectron.datasets.coco_to_cityscapes_id as cs
from detectron.utils.io import load_object
from detectron.utils.io import save_object
NUM_CS_CLS = 9 NUM_CS_CLS = 9
NUM_COCO_CLS = 81 NUM_COCO_CLS = 81
...@@ -92,8 +93,7 @@ def remove_momentum(model_dict): ...@@ -92,8 +93,7 @@ def remove_momentum(model_dict):
def load_and_convert_coco_model(args): def load_and_convert_coco_model(args):
with open(args.coco_model_file_name, 'r') as f: model_dict = load_object(args.coco_model_file_name)
model_dict = pickle.load(f)
remove_momentum(model_dict) remove_momentum(model_dict)
convert_coco_blobs_to_cityscape_blobs(model_dict) convert_coco_blobs_to_cityscape_blobs(model_dict)
return model_dict return model_dict
...@@ -106,7 +106,6 @@ if __name__ == '__main__': ...@@ -106,7 +106,6 @@ if __name__ == '__main__':
'Weights file does not exist' 'Weights file does not exist'
weights = load_and_convert_coco_model(args) weights = load_and_convert_coco_model(args)
with open(args.out_file_name, 'w') as f: save_object(weights, args.out_file_name)
pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
print('Wrote blobs to {}:'.format(args.out_file_name)) print('Wrote blobs to {}:'.format(args.out_file_name))
print(sorted(weights['blobs'].keys())) print(sorted(weights['blobs'].keys()))
...@@ -27,9 +27,10 @@ from __future__ import unicode_literals ...@@ -27,9 +27,10 @@ from __future__ import unicode_literals
import numpy as np import numpy as np
import scipy.io as sio import scipy.io as sio
import sys import sys
from six.moves import cPickle as pickle
from detectron.datasets.json_dataset import JsonDataset from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import save_object
if __name__ == '__main__': if __name__ == '__main__':
dataset_name = sys.argv[1] dataset_name = sys.argv[1]
...@@ -53,8 +54,4 @@ if __name__ == '__main__': ...@@ -53,8 +54,4 @@ if __name__ == '__main__':
scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32)) scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32))
ids.append(roidb[i]['id']) ids.append(roidb[i]['id'])
with open(file_out, 'wb') as f: save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out)
pickle.dump(
dict(boxes=boxes, scores=scores, indexes=ids), f,
pickle.HIGHEST_PROTOCOL
)
...@@ -29,7 +29,6 @@ import argparse ...@@ -29,7 +29,6 @@ import argparse
import numpy as np import numpy as np
import os import os
import sys import sys
from six.moves import cPickle as pickle
from caffe.proto import caffe_pb2 from caffe.proto import caffe_pb2
from caffe2.proto import caffe2_pb2 from caffe2.proto import caffe2_pb2
...@@ -37,6 +36,7 @@ from caffe2.python import caffe_translator ...@@ -37,6 +36,7 @@ from caffe2.python import caffe_translator
from caffe2.python import utils from caffe2.python import utils
from google.protobuf import text_format from google.protobuf import text_format
from detectron.utils.io import save_object
def parse_args(): def parse_args():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
...@@ -93,8 +93,7 @@ def pickle_weights(out_file_name, weights): ...@@ -93,8 +93,7 @@ def pickle_weights(out_file_name, weights):
normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob) normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob)
for blob in weights.protos for blob in weights.protos
} }
with open(out_file_name, 'w') as f: save_object(blobs, out_file_name)
pickle.dump(blobs, f, protocol=pickle.HIGHEST_PROTOCOL)
print('Wrote blobs:') print('Wrote blobs:')
print(sorted(blobs.keys())) print(sorted(blobs.keys()))
......
...@@ -33,12 +33,11 @@ from __future__ import unicode_literals ...@@ -33,12 +33,11 @@ from __future__ import unicode_literals
import argparse import argparse
import os import os
import sys import sys
import yaml
from six.moves import cPickle as pickle
from detectron.core.config import cfg from detectron.core.config import cfg
from detectron.datasets import task_evaluation from detectron.datasets import task_evaluation
from detectron.datasets.json_dataset import JsonDataset from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
from detectron.utils.logging import setup_logging from detectron.utils.logging import setup_logging
import detectron.core.config as core_config import detectron.core.config as core_config
...@@ -85,8 +84,8 @@ def parse_args(): ...@@ -85,8 +84,8 @@ def parse_args():
def do_reval(dataset_name, output_dir, args): def do_reval(dataset_name, output_dir, args):
dataset = JsonDataset(dataset_name) dataset = JsonDataset(dataset_name)
with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: dets = load_object(os.path.join(output_dir, 'detections.pkl'))
dets = pickle.load(f)
# Override config with the one saved in the detections file # Override config with the one saved in the detections file
if args.cfg_file is not None: if args.cfg_file is not None:
core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg'])) core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg']))
......
...@@ -26,9 +26,9 @@ import argparse ...@@ -26,9 +26,9 @@ import argparse
import cv2 import cv2
import os import os
import sys import sys
from six.moves import cPickle as pickle
from detectron.datasets.json_dataset import JsonDataset from detectron.datasets.json_dataset import JsonDataset
from detectron.utils.io import load_object
import detectron.utils.vis as vis_utils import detectron.utils.vis as vis_utils
# OpenCL may be enabled by default in OpenCV3; disable it because it's not # OpenCL may be enabled by default in OpenCV3; disable it because it's not
...@@ -84,8 +84,7 @@ def vis(dataset, detections_pkl, thresh, output_dir, limit=0): ...@@ -84,8 +84,7 @@ def vis(dataset, detections_pkl, thresh, output_dir, limit=0):
ds = JsonDataset(dataset) ds = JsonDataset(dataset)
roidb = ds.get_roidb() roidb = ds.get_roidb()
with open(detections_pkl, 'r') as f: dets = load_object(detections_pkl)
dets = pickle.load(f)
assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \ assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \
'Expected detections pkl file in the format used by test_engine.py' 'Expected detections pkl file in the format used by test_engine.py'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment