Fix AffineChannel op param initialization

Summary: The `AffineChannel` op wrapper did not use the officially sanctioned version of parameter creation, namely using `ModelHelper.create_param()` Presumably this was because of the `share_with` option where you could share parameters. While doing this though, the parameters were never initialized! This probably doesn't matter for the algorithm (those params get overwritten pretty quickly) but it's a bug nevertheless. I decided to kill the `share_with` option since this isn't used anyway and resort to the standard way of creating weights and biases. Reviewed By: rbgirshick Differential Revision: D6826914 fbshipit-source-id: 65628c59b085b9ab160006b003dd40dbefa2f7c1

Fix AffineChannel op param initialization
Summary: The `AffineChannel` op wrapper did not use the officially sanctioned version of parameter creation, namely using `ModelHelper.create_param()` Presumably this was because of the `share_with` option where you could share parameters. While doing this though, the parameters were never initialized! This probably doesn't matter for the algorithm (those params get overwritten pretty quickly) but it's a bug nevertheless. I decided to kill the `share_with` option since this isn't used anyway and resort to the standard way of creating weights and biases. Reviewed By: rbgirshick Differential Revision: D6826914 fbshipit-source-id: 65628c59b085b9ab160006b003dd40dbefa2f7c1
e59c30bb · Ashwin Bharambe · Ashwin Bharambe · 946ba8d0 · e59c30bb · e59c30bb
Commit e59c30bb authored Jan 30, 2018 by Ashwin Bharambe Committed by Ashwin Bharambe Jan 31, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 18 deletions

ResNet.py lib/modeling/ResNet.py +2 -2

detector.py lib/modeling/detector.py +19 -16

No files found.
--- a/lib/modeling/ResNet.py
+++ b/lib/modeling/ResNet.py
@@ -92,7 +92,7 @@ def add_ResNet_convX_body(model, block_counts, freeze_at=2):
    X = 4 or 5)."""
    assert freeze_at in [0, 2, 3, 4, 5]
    p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
-    p = model.AffineChannel(p, 'res_conv1_bn', inplace=True)
+    p = model.AffineChannel(p, 'res_conv1_bn', dim=64, inplace=True)
    p = model.Relu(p, p)
    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
    dim_in = 64
@@ -204,7 +204,7 @@ def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
        stride=stride,
        no_bias=1
    )
-    return model.AffineChannel(c, prefix + '_branch1_bn')
+    return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)
 # ------------------------------------------------------------------------------

--- a/lib/modeling/detector.py
+++ b/lib/modeling/detector.py
@@ -26,6 +26,8 @@ import logging
 from caffe2.python import cnn
 from caffe2.python import core
 from caffe2.python import workspace
+from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
 from core.config import cfg
 from ops.collect_and_distribute_fpn_rpn_proposals \
@@ -75,26 +77,27 @@ class DetectionModelHelper(cnn.CNNModelHelper):
                 str(p).find('gpu_{}'.format(gpu_id)) == 0)
            )]
-    def AffineChannel(self, blob_in, blob_out, share_with=None, inplace=False):
+    def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
        """Affine transformation to replace BN in networks where BN cannot be
        used (e.g., because the minibatch size is too small).
-        The AffineChannel parameters may be shared with another AffineChannelOp
+        The operations can be done in place to save memory.
-        by specifying its blob name (excluding the '_{s,b}' suffix) in the
-        share_with argument. The operations can be done in place to save memory.
        """
        blob_out = blob_out or self.net.NextName()
-        is_not_sharing = share_with is None
+        param_prefix = blob_out
-        param_prefix = blob_out if is_not_sharing else share_with
-        scale = core.ScopedBlobReference(
+        scale = self.create_param(
-            param_prefix + '_s', self.param_init_net)
+            param_name=param_prefix + '_s',
-        bias = core.ScopedBlobReference(
+            initializer=initializers.Initializer("ConstantFill", value=1.),
-            param_prefix + '_b', self.param_init_net)
+            tags=ParameterTags.WEIGHT,
-        if is_not_sharing:
+            shape=[dim, ],
-            self.net.Proto().external_input.extend([str(scale), str(bias)])
+        )
-            self.params.extend([scale, bias])
+        bias = self.create_param(
-            self.weights.append(scale)
+            param_name=param_prefix + '_b',
-            self.biases.append(bias)
+            initializer=initializers.Initializer("ConstantFill", value=0.),
+            tags=ParameterTags.BIAS,
+            shape=[dim, ],
+        )
        if inplace:
            return self.net.AffineChannel([blob_in, scale, bias], blob_in)
        else:
@@ -403,7 +406,7 @@ class DetectionModelHelper(cnn.CNNModelHelper):
            no_bias=1
        )
        blob_out = self.AffineChannel(
-            conv_blob, prefix + suffix, inplace=inplace
+            conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
        )
        return blob_out