Commit e59c30bb authored by Ashwin Bharambe's avatar Ashwin Bharambe Committed by Ashwin Bharambe

Fix AffineChannel op param initialization

Summary:
The `AffineChannel` op wrapper did not use the officially sanctioned
version of parameter creation, namely using `ModelHelper.create_param()`
Presumably this was because of the `share_with` option where you could share
parameters.

While doing this though, the parameters were never initialized! This probably
doesn't matter for the algorithm (those params get overwritten pretty quickly)
but it's a bug nevertheless. I decided to kill the `share_with` option since
this isn't used anyway and resort to the standard way of creating weights and
biases.

Reviewed By: rbgirshick

Differential Revision: D6826914

fbshipit-source-id: 65628c59b085b9ab160006b003dd40dbefa2f7c1
parent 946ba8d0
......@@ -92,7 +92,7 @@ def add_ResNet_convX_body(model, block_counts, freeze_at=2):
X = 4 or 5)."""
assert freeze_at in [0, 2, 3, 4, 5]
p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
p = model.AffineChannel(p, 'res_conv1_bn', inplace=True)
p = model.AffineChannel(p, 'res_conv1_bn', dim=64, inplace=True)
p = model.Relu(p, p)
p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
dim_in = 64
......@@ -204,7 +204,7 @@ def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
stride=stride,
no_bias=1
)
return model.AffineChannel(c, prefix + '_branch1_bn')
return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)
# ------------------------------------------------------------------------------
......
......@@ -26,6 +26,8 @@ import logging
from caffe2.python import cnn
from caffe2.python import core
from caffe2.python import workspace
from caffe2.python.modeling import initializers
from caffe2.python.modeling.parameter_info import ParameterTags
from core.config import cfg
from ops.collect_and_distribute_fpn_rpn_proposals \
......@@ -75,26 +77,27 @@ class DetectionModelHelper(cnn.CNNModelHelper):
str(p).find('gpu_{}'.format(gpu_id)) == 0)
)]
def AffineChannel(self, blob_in, blob_out, share_with=None, inplace=False):
def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
"""Affine transformation to replace BN in networks where BN cannot be
used (e.g., because the minibatch size is too small).
The AffineChannel parameters may be shared with another AffineChannelOp
by specifying its blob name (excluding the '_{s,b}' suffix) in the
share_with argument. The operations can be done in place to save memory.
The operations can be done in place to save memory.
"""
blob_out = blob_out or self.net.NextName()
is_not_sharing = share_with is None
param_prefix = blob_out if is_not_sharing else share_with
scale = core.ScopedBlobReference(
param_prefix + '_s', self.param_init_net)
bias = core.ScopedBlobReference(
param_prefix + '_b', self.param_init_net)
if is_not_sharing:
self.net.Proto().external_input.extend([str(scale), str(bias)])
self.params.extend([scale, bias])
self.weights.append(scale)
self.biases.append(bias)
param_prefix = blob_out
scale = self.create_param(
param_name=param_prefix + '_s',
initializer=initializers.Initializer("ConstantFill", value=1.),
tags=ParameterTags.WEIGHT,
shape=[dim, ],
)
bias = self.create_param(
param_name=param_prefix + '_b',
initializer=initializers.Initializer("ConstantFill", value=0.),
tags=ParameterTags.BIAS,
shape=[dim, ],
)
if inplace:
return self.net.AffineChannel([blob_in, scale, bias], blob_in)
else:
......@@ -403,7 +406,7 @@ class DetectionModelHelper(cnn.CNNModelHelper):
no_bias=1
)
blob_out = self.AffineChannel(
conv_blob, prefix + suffix, inplace=inplace
conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
)
return blob_out
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment