Commit e59c30bb authored by Ashwin Bharambe's avatar Ashwin Bharambe Committed by Ashwin Bharambe

Fix AffineChannel op param initialization

Summary:
The `AffineChannel` op wrapper did not use the officially sanctioned
version of parameter creation, namely using `ModelHelper.create_param()`
Presumably this was because of the `share_with` option where you could share
parameters.

While doing this though, the parameters were never initialized! This probably
doesn't matter for the algorithm (those params get overwritten pretty quickly)
but it's a bug nevertheless. I decided to kill the `share_with` option since
this isn't used anyway and resort to the standard way of creating weights and
biases.

Reviewed By: rbgirshick

Differential Revision: D6826914

fbshipit-source-id: 65628c59b085b9ab160006b003dd40dbefa2f7c1
parent 946ba8d0
...@@ -92,7 +92,7 @@ def add_ResNet_convX_body(model, block_counts, freeze_at=2): ...@@ -92,7 +92,7 @@ def add_ResNet_convX_body(model, block_counts, freeze_at=2):
X = 4 or 5).""" X = 4 or 5)."""
assert freeze_at in [0, 2, 3, 4, 5] assert freeze_at in [0, 2, 3, 4, 5]
p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1) p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
p = model.AffineChannel(p, 'res_conv1_bn', inplace=True) p = model.AffineChannel(p, 'res_conv1_bn', dim=64, inplace=True)
p = model.Relu(p, p) p = model.Relu(p, p)
p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
dim_in = 64 dim_in = 64
...@@ -204,7 +204,7 @@ def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): ...@@ -204,7 +204,7 @@ def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
stride=stride, stride=stride,
no_bias=1 no_bias=1
) )
return model.AffineChannel(c, prefix + '_branch1_bn') return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
......
...@@ -26,6 +26,8 @@ import logging ...@@ -26,6 +26,8 @@ import logging
from caffe2.python import cnn from caffe2.python import cnn
from caffe2.python import core from caffe2.python import core
from caffe2.python import workspace from caffe2.python import workspace
from caffe2.python.modeling import initializers
from caffe2.python.modeling.parameter_info import ParameterTags
from core.config import cfg from core.config import cfg
from ops.collect_and_distribute_fpn_rpn_proposals \ from ops.collect_and_distribute_fpn_rpn_proposals \
...@@ -75,26 +77,27 @@ class DetectionModelHelper(cnn.CNNModelHelper): ...@@ -75,26 +77,27 @@ class DetectionModelHelper(cnn.CNNModelHelper):
str(p).find('gpu_{}'.format(gpu_id)) == 0) str(p).find('gpu_{}'.format(gpu_id)) == 0)
)] )]
def AffineChannel(self, blob_in, blob_out, share_with=None, inplace=False): def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
"""Affine transformation to replace BN in networks where BN cannot be """Affine transformation to replace BN in networks where BN cannot be
used (e.g., because the minibatch size is too small). used (e.g., because the minibatch size is too small).
The AffineChannel parameters may be shared with another AffineChannelOp The operations can be done in place to save memory.
by specifying its blob name (excluding the '_{s,b}' suffix) in the
share_with argument. The operations can be done in place to save memory.
""" """
blob_out = blob_out or self.net.NextName() blob_out = blob_out or self.net.NextName()
is_not_sharing = share_with is None param_prefix = blob_out
param_prefix = blob_out if is_not_sharing else share_with
scale = core.ScopedBlobReference( scale = self.create_param(
param_prefix + '_s', self.param_init_net) param_name=param_prefix + '_s',
bias = core.ScopedBlobReference( initializer=initializers.Initializer("ConstantFill", value=1.),
param_prefix + '_b', self.param_init_net) tags=ParameterTags.WEIGHT,
if is_not_sharing: shape=[dim, ],
self.net.Proto().external_input.extend([str(scale), str(bias)]) )
self.params.extend([scale, bias]) bias = self.create_param(
self.weights.append(scale) param_name=param_prefix + '_b',
self.biases.append(bias) initializer=initializers.Initializer("ConstantFill", value=0.),
tags=ParameterTags.BIAS,
shape=[dim, ],
)
if inplace: if inplace:
return self.net.AffineChannel([blob_in, scale, bias], blob_in) return self.net.AffineChannel([blob_in, scale, bias], blob_in)
else: else:
...@@ -403,7 +406,7 @@ class DetectionModelHelper(cnn.CNNModelHelper): ...@@ -403,7 +406,7 @@ class DetectionModelHelper(cnn.CNNModelHelper):
no_bias=1 no_bias=1
) )
blob_out = self.AffineChannel( blob_out = self.AffineChannel(
conv_blob, prefix + suffix, inplace=inplace conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
) )
return blob_out return blob_out
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment