通过pytorch转换得到ms模型，训练模式下输出和pytorch模型一样，验证模式下通过batchnorm2d算子的输出不同

pytorch源码输入三维度，使用batchnorm1d算子，mindspore的batchnorm1d输入为2维，所以我选择使用ms的batchnorm2d算子（输入为4维），对输入先进行升维度，得到batchnorm2d的输出在进行降维处理。通过pytorch转换得到的ms模型在训练模式下的输出相同，在测试模式下通过batchnorm2d算子时（对应Pytorch的batchnorm1d算子，使用的权重也为bn1d的权重）得到的输出不同。


# Ms
class PFNLayer(nn.Cell):
    def __init__(self, in_channels, out_channels, norm_cfg=None, last_layer=False):
        super(PFNLayer, self).__init__()
 
        self.last_vfe = last_layer
        if not self.last_vfe:
            out_channels = out_channels // 2
        self.units = out_channels
 
        self.linear = nn.Dense(in_channels, self.units, has_bias=False)
        if norm_cfg is None:
            self.norm = nn.BatchNorm2d(self.units, eps=1e-3, momentum=0.99, use_batch_statistics=True)
 
        self.transpose = ops.Transpose()
        self.tile = ops.Tile()
        self.concat = ops.Concat(axis=2)
        self.expand_dims = ops.ExpandDims()
        self.argmax_w_value = ops.ArgMaxWithValue(axis=1, keep_dims=True)
 
    def construct(self, inputs):
        """forward graph"""
        x = self.linear(inputs)
        x = self.expand_dims(x, 0)
        x = self.norm(x.transpose((0, 3, 1, 2))).transpose((0, 2, 3, 1)).squeeze(axis=0)  # MS的bn2d的升降维度处理
        x = ops.ReLU()(x)
        x_max = self.argmax_w_value(x)[1]
        if self.last_vfe:
            return x_max
        x_repeat = self.tile(x_max, (1, inputs.shape[1], 1))
        x_concatenated = self.concat([x, x_repeat])
        return x_concatenated
 
norm_cfg = {
    # format: layer_type: (abbreviation, module)
    "BN": ("bn", nn.BatchNorm2d),
    "BN1d": ("bn1d", nn.BatchNorm1d),
    "GN": ("gn", nn.GroupNorm),
}
 
def build_norm_layer(cfg, num_features, postfix=""):
    """ Build normalization layer
    """
    assert isinstance(cfg, dict) and "type" in cfg
    cfg_ = cfg.copy()
 
    layer_type = cfg_.pop("type")
    if layer_type not in norm_cfg:
        raise KeyError("Unrecognized norm type {}".format(layer_type))
    else:
        abbr, norm_layer = norm_cfg[layer_type]
        if norm_layer is None:
            raise NotImplementedError
 
    assert isinstance(postfix, (int, str))
    name = abbr + str(postfix)
 
    requires_grad = cfg_.pop("requires_grad", True)
    cfg_.setdefault("eps", 1e-5)
    if layer_type != "GN":
        layer = norm_layer(num_features, **cfg_)
        # if layer_type == 'SyncBN':
        #     layer._specify_ddp_gpu_num(1)
    else:
        assert "num_groups" in cfg_
        layer = norm_layer(num_channels=num_features, **cfg_)
 
    for param in layer.parameters():
        param.requires_grad = requires_grad
 
    return name, layer
 
 
# pytorch
class PFNLayer(nn.Module):
    def __init__(self, in_channels, out_channels, norm_cfg=None, last_layer=False):
        
        super().__init__()
        self.name = "PFNLayer"
        self.last_vfe = last_layer
        if not self.last_vfe:
            out_channels = out_channels // 2
        self.units = out_channels
 
        if norm_cfg is None:
            norm_cfg = dict(type="BN1d", eps=1e-3, momentum=0.01)
        self.norm_cfg = norm_cfg
 
        self.linear = nn.Linear(in_channels, self.units, bias=False)
        self.norm = build_norm_layer(self.norm_cfg, self.units)[1]
 
    def forward(self, inputs):
        x = self.linear(inputs)
        torch.backends.cudnn.enabled = False
        x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()    # pytorch的bn1d
        torch.backends.cudnn.enabled = True
        x = F.relu(x)
 
        x_max = torch.max(x, dim=1, keepdim=True)[0]
 
        if self.last_vfe:
            return x_max
        else:
            x_repeat = x_max.repeat(1, inputs.shape[1], 1)
            x_concatenated = torch.cat([x, x_repeat], dim=2)
            return x_concatenated

****************************************************解答*****************************************************

您好，batchnorm是1d还是2d应该要统一才有对比意义吧。而且示例里面相当于batch size=1，这个对于batchnorm来说也失去了意义。

相关阅读:
Js中一些数组常用API总结
 Vivado生成sdf文件命令
 深入剖析Sgementation fault原理
 Spring Security（8）
vue的基本使用
 springBoot集成websocket实现消息实时推送提醒
 常用 numpy 函数（长期更新）
DEVICENET 总线转MODBUS-TCP协议网关连接台达plc配置方法
 java 字节流写入文件内容实现换行
 Linux线程
原文地址：https://blog.csdn.net/weixin_45666880/article/details/127731493