Caffe To Caffe2 Python脚本caffe_translator.py详细注释

陆俊捷
2023-12-01

将Caffe的模型转换成Caffe2模型,官方提供了一个caffe_translator.py脚本。在此我做了详细的注释。

## @package caffe_translator
# Module caffe2.python.caffe_translator
#!/usr/bin/env python2

import argparse
import copy
import logging
import re
import numpy as np  # noqa

from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
from caffe.proto import caffe_pb2
from caffe2.python import core, utils, workspace
from google.protobuf import text_format

logging.basicConfig()
log = logging.getLogger("caffe_translator")
log.setLevel(logging.INFO)


def _StateMeetsRule(state, rule):
    """A function that reproduces Caffe's StateMeetsRule functionality."""
    if rule.HasField('phase') and rule.phase != state.phase:
        return False
    if rule.HasField('min_level') and state.level < rule.min_level:
        return False
    if rule.HasField('max_level') and state.level > rule.max_level:
        return False
    curr_stages = set(list(state.stage))
    # all stages in rule.stages should be in, otherwise it's not a match.
    if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
        return False
    # none of the stage in rule.stages should be in, otherwise it's not a match.
    if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
        return False
    # If none of the nonmatch happens, return True.
    return True

# 判断该layer是否属于给定的网络状态
def _ShouldInclude(net_state, layer):
    """A function that reproduces Caffe's inclusion and exclusion rule."""
    ret = (len(layer.include) == 0)
    # check exclude rules: if any exclusion is met, we shouldn't include.
    ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
    if len(layer.include):
        # check include rules: if any inclusion is met, we should include.
        ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
    return ret

# dummy_input 一个假设的输入,随机值
def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
    dim_map = {}
    ws = workspace.C.Workspace()# 
    for param in net_params.protos:#遍历net_params中可学习的参数
        #在workspace中创建一个blob,并将可学习的参数值,填入其中
        ws.create_blob(param.name) \
            .feed(utils.Caffe2TensorToNumpyArray(param))
    #网络的输入
    external_input = net.op[0].input[0]
    ws.create_blob(external_input).feed(dummy_input)#在workspace中创建一个输入blob,并将随机生成的输入数据填入其中

    # Get dimensions with legacy pad
    for i in range(len(net.op)):#遍历net的Op
        op_def = net.op[i]
        ws._run_operator(op_def.SerializeToString())#依次运行每一个Op
        if i in legacy_pad_ops:#如果当前的Op有参数legacy_pad
            output = op_def.output[0]#获得该Op的输出的名字
            blob_legacy = ws.fetch_blob(output)#从workspace得到该Op输出的具体的值
            dim_map[i] = blob_legacy.shape#获得该Op输出的blob的shape
    # 返回的dim_map,是一个字典
    # key:包含legacy_pad参数的Op在net中的索引序号
    # value:该Op的输出shape
    return dim_map


def _GetLegacyPadArgs(op_def, arg_map):
    pads = {}
    keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
    is_pad = 'pad' in arg_map#'pad'参数是否在该Op的参数中
    if is_pad:#如果存在
        for k in keys:
            # 给pad_l,pad_t,pad_r,pad_b 都赋值成pad的值
            # arg 是一个Argument,Argument中i为int64,也就是一个整数
            pads[k] = arg_map['pad'].i
    else:#如果pad不在,则必然存在pad_l,pad_t,pad_r,pad_b
        pads = {x: arg_map[x].i for x in keys}

    # 最终pads,包括的是pad_l,pad_t,pad_r,pad_b,及其值
    return pads

# pads:保存的是包括的是pad_l,pad_t,pad_r,pad_b,及其值
# arg_mag: 一个字典。key=arg的name,value=arg
# dim1: legacy_pad移除前的shape
# dim2:legacy_pad移除后的shape
# legacy_pad,是上下取整的区别,legacy_pad是向下取整的
def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
    n1, c1, h1, w1 = dim1
    n2, c2, h2, w2 = dim2
    assert(n1 == n2)# N 和 C是一定相等的
    assert(c1 == c2)
    is_pad = 'pad' in arg_map
    if h1 != h2 or w1 != w2:#如果h1和h2不同,或者w1和w2不同
        if h1 == h2 + 1:
            pads['pad_b'] += 1
        elif h1 != h2:#如果h1 != h2,则必满足第一种情况 h1 == h2+1
            raise Exception("Unexpected dimensions for height:", h1, h2)
        if w1 == w2 + 1:
            pads['pad_r'] += 1
        elif w1 != w2:
            raise Exception("Unexpected dimensions for width:", w1, w2)
        if is_pad:#如果该Op中包含pad参数
            op_def.arg.remove(arg_map['pad'])#移除pad参数
            args = []
            for name in pads.keys():#pads包含pad_l,pad_t,pad_r,pad_b
                arg = caffe2_pb2.Argument()
                arg.name = name
                arg.i = pads[name]
                args.append(arg)
            op_def.arg.extend(args)#移除该Op的pad参数,增加了pad_l,pad_t,pad_r,pad_b参数
        else:
            for name in pads.keys():
                arg_map[name].i = pads[name]#如果该Op中包含的是pad_l等参数,直接将修改后的值跟新原来的值。

# net: 只保存了Op的信息,没有可学习的参数
# neg_params: 保存了可学习的参数
def _RemoveLegacyPad(net, net_params, input_dims):
    legacy_pad_ops = []
    for i in range(len(net.op)):#遍历net中保存的Op
        op_def = net.op[i]
        #真正使用pad参数的层就只有conv和pooling层,因此只对这两个Op进行操作即可
        if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
                    op_def.type):
            for arg in op_def.arg:#如果该Op是conv或pooling,则遍历该Op的参数,arg
                if arg.name == 'legacy_pad':#如果该Op中有个参数的名字是‘legacy_pad’,则将该Op在net的索引序号,填加到legacy_pad_ops
                    legacy_pad_ops.append(i)
                    break

    if legacy_pad_ops:#开始处理
        n, c, h, w = input_dims
        dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
        # 获得legacy_pad移除前,特定Op的输出的shape
        dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)

        # Running with the legacy pad argument removed
        # compare the dimensions and adjust pad argument when necessary
        # 运行移除了参数legacy pad的net,比较移除前后维度的区别,必要时对pad参数进行调整
        # 移除前的shape,在dim_map中保存着
        ws = workspace.C.Workspace()

        external_input = net.op[0].input[0]
        ws.create_blob(external_input).feed_blob(dummy_input)#workspace中创建一个输入blob,并填充一个随机值
        for param in net_params.protos:#在workspace中创建可学习参数blob,并填充值
            ws.create_blob(param.name) \
              .feed_blob(utils.Caffe2TensorToNumpyArray(param))

        for i in range(len(net.op)):#遍历net中的Op
            op_def = net.op[i]
            # 如果该Op包含参数legacy_pad,则进行如下处理
            # 如果该Op不包含参数legacy_pad,则直接运行该Op
            if i in legacy_pad_ops:
                arg_map = {}
                for arg in op_def.arg:#循环遍历该Op的参数,填充字典arg_map
                    arg_map[arg.name] = arg
                pads = _GetLegacyPadArgs(op_def, arg_map)
                # remove legacy pad arg
                for j in range(len(op_def.arg)):#遍历对应Op的参数
                    arg = op_def.arg[j]
                    if arg.name == 'legacy_pad':#如果参数中有legacy_pad,ze删除'legacy_pad'这个参数
                        del op_def.arg[j]
                        break
                output = op_def.output[0]#该Op的输出名
                # use a new name to avoid the interference with inplace
                nonlegacy_output = output + '_nonlegacy'
                op_def.output[0] = nonlegacy_output#给该Op的输出起一个新名字,避免覆盖
                ws._run_operator(op_def.SerializeToString())#运行该Op,注意此时该Op中‘legacy_pad’参数已经被移除了
                blob_nonlegacy = ws.fetch_blob(nonlegacy_output)#获得输出blob的数据
                # reset output name
                op_def.output[0] = output#重新把该op的输出名字,改回原来的名字

                dim1 = dim_map[i]#dim_map,保存的是`legacy_pad`被移除前,该Op输出的shape
                dim2 = blob_nonlegacy.shape#以后的shape
                _AdjustDims(op_def, arg_map, pads, dim1, dim2)#调整pad_l,pad_t等后,跟新该Op的pad参数

            ws._run_operator(op_def.SerializeToString())
    return net


def _GetBlobDimMap(net, net_params, dummy_input):
    dim_map = {}
    ws = workspace.C.Workspace()
    for param in net_params.protos:
        ws.create_blob(param.name) \
          .feed(utils.Caffe2TensorToNumpyArray(param))
    external_input = net.op[0].input[0]
    ws.create_blob(external_input).feed(dummy_input)
    # Get dimensions with legacy pad
    for i in range(len(net.op)):
        op_def = net.op[i]
        ws._run_operator(op_def.SerializeToString())
        for output in op_def.output:
            blob = ws.fetch_blob(output)
            dim_map[output] = blob.shape
    return dim_map

# 通过deploy.prototxt获得输入blob的维度
def _GetInputDims(caffe_net):
    input_dims = []
    if caffe_net.input_dim:
        input_dims = caffe_net.input_dim
    elif caffe_net.input_shape:
        input_dims = caffe_net.input_shape[0].dim
    elif caffe_net.layer[0].input_param.shape:
        # getting input dimension from first layer
        input_dims = caffe_net.layer[0].input_param.shape[0].dim
    return input_dims


class TranslatorRegistry(object):
    registry_ = {} #一个全局字典

    @classmethod
    def Register(cls, op_name):#这是一个很重要的装饰器
        """A decorator(装饰器) for registering gradient mappings."""

        def Wrapper(func):#把对应的函数func,封装进一个字典中,对应的key是op的name,看看下面具体的例子吧
            cls.registry_[op_name] = func
            return func

        return Wrapper

    @classmethod
    def TranslateLayer(cls, layer, pretrained_blobs, is_test, **kwargs):
        try:
            caffe_ops, params = cls.registry_[layer.type](
                layer, pretrained_blobs, is_test, **kwargs)
        except KeyError:
            raise KeyError('No translator registered for layer: %s yet.' %
                           str(layer))
        if caffe_ops is None:
            caffe_ops = []
        if type(caffe_ops) is not list:
            caffe_ops = [caffe_ops]
        return caffe_ops, params

    @classmethod
    def TranslateModel(
        cls,
        caffe_net,#deploy.prototxt
        pretrained_net,# caffemodel
        is_test=False,
        net_state=None,
        remove_legacy_pad=False,
        input_dims=None
    ):
        # caffe2_pb2.NetDef()
        '''
        // Network definition.
        message NetDef {
            optional string name = 1; // the network's name
            // Operators that the network contains.
            // Note: this is not named "operator" because that is a reserved word in C++.
            repeated OperatorDef op = 2;

            // The type of network that the net should be run with. This routes the
            // network instantiation to different execution modes. The default mode,
            // "simple", runs the operators in a sequential way as the original Caffe
            // implementation does.
            optional string type = 3;

            // the number of workers, if the operators in the network is to be carried out
            // in parallel.
            // Note: This is to be deprecated. Using the arg field with "num_workers" as
            // key.
            optional int32 num_workers = 4 [deprecated=true];

            // The device option for the network. If a network has a specific device
            // option and one of its operators does not have it set, we will copy over the
            // device option to the operator. This allows us to basically avoid putting
            // device options at every operator.
            optional DeviceOption device_option = 5;

            repeated Argument arg = 6;

            // Two optional fields to declare(声明) external input and output of a net.
            // If these two are set, when a net is created, we will sanity(理智) check for
            // every op whether its input is declared (either as an external input,
            // or as an intermediate(中间) blob created by one of the ops), and sanity check
            // if all blobs in external_output are produced.
            //
            // In cases of memory optimization, declaring external_input and
            // external_output also ensures that storage of these blobs are persistent(固定):
            // for any blob in external_input and external_output, after a network run
            // finishes, their content(内容) are actually the right content. Any intermediate
            // blobs' contents may be overwritten.
            repeated string external_input = 7;
            repeated string external_output = 8;
        }


        '''
        #一定要注意caffe_pb2,和caffe2_pb2的区别,前者是caffe的protobuf,后者是caffe2的protobuf
        net_state = caffe_pb2.NetState() if net_state is None else net_state#获取caffemodel中网络状态,TRAIN or TEST
        net = caffe2_pb2.NetDef()# caffe2 网络的定义
        net.name = caffe_net.name# 获取caffe网络的名字
        net_params = caffe2_pb2.TensorProtos()# caffe2中网络的参数,是保存在Tensor中的
        # caffe中的NetParameter包括 LayerParameter layer 和 V1LayerParameter layers
        if len(caffe_net.layers) > 0: #本转换脚本不支持V1LayerParameter
            raise ValueError(
                'I think something is wrong. This translation script '
                'only accepts new style layers that are stored in the '
                'layer field.'
            )
        if not input_dims:# 如果input_dims 参数没有指定,则从deply.prototxt推测出来
            input_dims = _GetInputDims(caffe_net)
        #遍历deploy.prototxt中的各个层
        for layer in caffe_net.layer:
            if not _ShouldInclude(net_state, layer):#判断是否当前层包括在当前的网络状态中
                log.info('Current net state does not need layer {}'
                            .format(layer.name))
                continue
            log.info('Translate layer {}'.format(layer.name))
            # Get pretrained one
            # layer每次循环是一个具体的值,遍历caffemodel中的layer,
            # 找到caffemodel中的层与当前deploy.prototxt中层,相同的层
            # 由前面的限定,deploy.prototxt中只能包含LayerParameter
            # 但是caffemodel中并没有这个限定,因此会有两个判断
            # 一般情况下pretrained_layers=[],列表中只有一个值
            # pretrained_layers 包含的不仅仅是层的名字,而是整个layer的信息
            pretrained_layers = (
                [l for l in pretrained_net.layer
                 if l.name == layer.name] + [l
                                             for l in pretrained_net.layers
                                             if l.name == layer.name]
            )
            if len(pretrained_layers) > 1:#pretrained_layers里包含层不能超过一个
                raise ValueError(
                    'huh? more than one pretrained layer of one name?')
            elif len(pretrained_layers) == 1:#只找到一个层
                pretrained_blobs = [# pretrained_blobs 存放当前层的参数
                    utils.CaffeBlobToNumpyArray(blob)#caffe的blob转换成数组
                    for blob in pretrained_layers[0].blobs#每个层里可能包含多个可学习的参数,比如conv中的weight和bais
                ]
            else:
                # No pretrained layer for the given layer name. We'll just pass
                # no parameter blobs.
                # print 'No pretrained layer for layer', layer.name
                pretrained_blobs = []
            # 具体层的转换,operators保存了caffe2的Op的输入输出的名字,stride等参数
            # params:是一个列表,保存着来自caffemodle的可学习参数,如weight,bias
            operators, params = cls.TranslateLayer(
                layer, pretrained_blobs, is_test, net=net,
                net_params=net_params, input_dims=input_dims)

            net.op.extend(operators)#net = caffe2_pb2.NetDef() 把该Op添加进net中
            net_params.protos.extend(params)# net_params = caffe2_pb2.TensorProtos()

        # for循环结束,caffe中的每个层都转换完成了,现在要统一处理remove_legacy_pad问题
        if remove_legacy_pad:#input_dims必须要有值
            assert input_dims, \
                   'Please specify input_dims to remove legacy_pad'
            net = _RemoveLegacyPad(net, net_params, input_dims)#修改pad参数
        # 放回的net,包括每个Op的信息,和不可学习的参数
        # net_params: 包括每个Op,可学习的参数
        return net, net_params


def TranslateModel(*args, **kwargs):
    return TranslatorRegistry.TranslateModel(*args, **kwargs)

# net_params: 包含了可学习的参数
# input_name: external_input = net.op[0].input[0],网络的输入blob的名字
def ConvertTensorProtosToInitNet(net_params, input_name):
    """Takes the net_params returned from TranslateModel, and wrap(包) it as an
    init net that contain GivenTensorFill.

    This is a very simple feature that only works with float tensors, and is
    only intended to(打算) be used in an environment where you want a single
    initialization file - for more complex cases, use a db to store the
    parameters.
    """
    init_net = caffe2_pb2.NetDef()# 定义一个caffe2的网络
    for tensor in net_params.protos:# 遍历可学习的参数
        if len(tensor.float_data) == 0:# 目前只支持float tensors
            raise RuntimeError(
                "Only float tensors are supported in this util.")
        op = core.CreateOperator(# 创建一个GivenTensorFill操作,该Op保存了一个可学习的Tensor信息
            "GivenTensorFill", [], [tensor.name],#输出的名字,就是可学习参数的名字
            arg=[
                utils.MakeArgument("shape", list(tensor.dims)),#该Op一些额外的参数
                utils.MakeArgument("values", tensor.float_data)])#可学习参数的值,在此填充了进去
        init_net.op.extend([op])#将该Op加入到init_net网络中
    #网络的输入blob
    init_net.op.extend([core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
    # init_net 保存的是可学习参数了
    return init_net


def BaseTranslate(layer, caffe2_type):
    """A simple translate interface that maps the layer input and output.
        一个简单的接口转换
    """
    caffe2_op = caffe2_pb2.OperatorDef()#声明一个caffe2的Op
    caffe2_op.type = caffe2_type#定义该Op的类型
    caffe2_op.input.extend(layer.bottom)#定义该Op的输入和输入出
    caffe2_op.output.extend(layer.top)
    return caffe2_op


def AddArgument(op, key, value):
    """Makes an argument based on the value type.
       在每一个OperatorDef中都一个
       // arg is for the argument defined in operator schema
        repeated Argument arg = 5;
       在caffe2中,将每个层的参数,这里指的是stride,pad,kernel这类不可学习的参数,都是用Argument表示的
       每个参数都是一个Argument,因此每个Operator中都应该有多个Argument

       // A named argument containing either singular(单数) float, integer and string
        // values, or repeated float, int and string arrays.
        message Argument {
            optional string name = 1;

            optional float f = 2;
            optional int64 i = 3;
            optional bytes s = 4;
            optional TensorProto t = 10;
            optional NetDef n = 8;

            repeated float floats = 5;
            repeated int64 ints = 6;
            repeated bytes strings = 7;
            repeated TensorProto tensors = 11;
            repeated NetDef nets = 9;
        }

    """
    # key就是参数的名字,如stride,pad
    # value是其具体的值
    # 这里也是caffe2比caffe灵活的地方,在caffe中这些参数都是事先定义好的,写在caffe.proto中
    # 而caffe2中可以随便添加
    op.arg.extend([utils.MakeArgument(key, value)])

################################################################################
# Common translators for layers.
################################################################################


@TranslatorRegistry.Register("Input")
def TranslateInput(layer, pretrained_blobs, is_test, **kwargs):
    return [], []


@TranslatorRegistry.Register("VideoData")
def TranslateVideoData(layer, pretrained_blobs, is_test, **kwargs):
    return [], []


@TranslatorRegistry.Register("Data")
def TranslateData(layer, pretrained_blobs, is_test, **kwargs):
    return [], []


# A function used in convolution, pooling and deconvolution to deal with
# conv pool specific parameters.
# 用在conv,pooling,deconv层,因为这些层有特殊的参数Stride、Pad和Kernel,需要特别处理
# 参数param:caffe中对应层的参数(deploy.prototxt),caffe_op:caffe2中的Op
# 转换层参数,这里指的是stride,pad,kernel进入caffe2的Op中
def _TranslateStridePadKernelHelper(param, caffe_op):
    # stride、pad和kernel第一种定义方式,也是最常用的方式
    try:
        # 在deploy.prototxt中定义conv,pooling时,
        # 参数stride、kernel_size和pad有多中定义方式,
        # 如果使用的是stride,而不是stride_h,stride_w,则其必须是一个值
        if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
                len(param.pad) > 1):
            raise NotImplementedError(
                "Translator currently does not support non-conventional "
                "pad/kernel/stride settings."
            )
        # stride、pad和kernel_size 看看deploy.prototxt中是否定义,没有定义的话使用默认值
        stride = param.stride[0] if len(param.stride) else 1
        pad = param.pad[0] if len(param.pad) else 0
        kernel = param.kernel_size[0] if len(param.kernel_size) else 0
    except TypeError:
        # This catches the case of a PoolingParameter, in which case we are
        # having non-repeating pad, stride and kernel.
        # 上面针对ConvolutionParameter,pad,stride and kernel 是repeating类型
        # 因此其访问可以用param.stride[0]
        # 但是PoolingParameter中,stride、pad和kernel是non-repeating类型
        # 因此不能用param.stride[0],所以上面会报错
        stride = param.stride
        pad = param.pad
        kernel = param.kernel_size
    #stride、pad和kernel第二种定义方式
    # 利用AddArgument函数,向caffe2中Op添加参数(stride,pad,kernel)
    # Get stride
    if param.HasField("stride_h") or param.HasField("stride_w"):
        AddArgument(caffe_op, "stride_h", param.stride_h)
        AddArgument(caffe_op, "stride_w", param.stride_w)
    else:
        AddArgument(caffe_op, "stride", stride)
    # Get pad
    if param.HasField("pad_h") or param.HasField("pad_w"):
        if param.pad_h == param.pad_w:
            AddArgument(caffe_op, "pad", param.pad_h)
        else:
            AddArgument(caffe_op, "pad_t", param.pad_h)
            AddArgument(caffe_op, "pad_b", param.pad_h)
            AddArgument(caffe_op, "pad_l", param.pad_w)
            AddArgument(caffe_op, "pad_r", param.pad_w)
    else:
        AddArgument(caffe_op, "pad", pad)
    # Get kernel
    if param.HasField("kernel_h") or param.HasField("kernel_w"):
        AddArgument(caffe_op, "kernel_h", param.kernel_h)
        AddArgument(caffe_op, "kernel_w", param.kernel_w)
    else:
        AddArgument(caffe_op, "kernel", kernel)

#具体每个层的转换,注意装饰器的使用
@TranslatorRegistry.Register("Convolution3D")
def TranslateConvNd(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.convolution3d_param
    caffe_op = BaseTranslate(layer, "Conv")
    output = caffe_op.output[0]
    caffe_op.input.append(output + '_w')

    AddArgument(
        caffe_op,
        "kernels",
        [param.kernel_depth, param.kernel_size, param.kernel_size])
    AddArgument(
        caffe_op,
        "strides",
        [param.temporal_stride, param.stride, param.stride])
    temporal_pad = 0
    spatial_pad = 0
    if hasattr(param, 'temporal_pad'):
        temporal_pad = param.temporal_pad
    if hasattr(param, 'pad'):
        spatial_pad = param.pad
    AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)

    # weight
    params = [
        utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
    # bias
    if len(pretrained_blobs) == 2:
        caffe_op.input.append(output + '_b')
        params.append(
            utils.NumpyArrayToCaffe2Tensor(
                pretrained_blobs[1].flatten(), output + '_b'))
    return caffe_op, params

# 详细看看卷积层
@TranslatorRegistry.Register("Convolution")
def TranslateConv(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.convolution_param#获取deploy.prototxt中卷基层的卷积参数
    caffe_op = BaseTranslate(layer, "Conv")#这个名字起的有点问题呀,BaseTranslage返回的是一个caffe2的Op对象
    output = caffe_op.output[0]#获得该caffe2Op输入块的名字
    # 根据输出块的名字,可以找到caffe2中卷积层参数的名字,weight=outputname_w
    # caffe2中一个Op的参数,也是作为该Op的输入对待的
    caffe_op.input.append(output + '_w')#把weight加入到该Op的输入中
    _TranslateStridePadKernelHelper(param, caffe_op)#转换stride,pad,kernel进入caffe2的Op
    # weight
    params = [#现在params中存放了来自caffemodel的weight
        utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]#从pretrained_blobs中取出weight,第二个参数是Tensor的名字
    # bias
    if len(pretrained_blobs) == 2:#如果caffemodel中,有bias
        caffe_op.input.append(output + '_b')#在该Op的输入中,再增加一个bias
        params.append(#添加来自caffemodel中的bias
            utils.NumpyArrayToCaffe2Tensor(
                pretrained_blobs[1].flatten(), output + '_b'))

    # 看看是否有group和dilation参数
    # Group convolution option
    if param.group != 1:
        AddArgument(caffe_op, "group", param.group)
    # Get dilation - not tested. If you have a model and this checks out,
    # please provide a test and uncomment this.
    if len(param.dilation) > 0:
        if len(param.dilation) == 1:
            AddArgument(caffe_op, "dilation", param.dilation[0])
        elif len(param.dilation) == 2:
            AddArgument(caffe_op, "dilation_h", param.dilation[0])
            AddArgument(caffe_op, "dilation_w", param.dilation[1])
    # 返回caffe_op,此时的caffe_Op,它里面只包括type,输入输出的名字,该层的参数和其对应的值
    # params:可学习的参数,weight和bias
    return caffe_op, params


@TranslatorRegistry.Register("Deconvolution")
def TranslateDeconv(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.convolution_param
    if param.group > 1:
        raise NotImplementedError(
            "Translator currently does not support group deconvolution."
        )
    caffe_op = BaseTranslate(layer, "ConvTranspose")
    output = caffe_op.output[0]
    _TranslateStridePadKernelHelper(param, caffe_op)
    caffe_op.input.extend([output + '_w'])
    AddArgument(caffe_op, "order", "NCHW")
    weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
    if param.bias_term:
        bias = utils.NumpyArrayToCaffe2Tensor(
            pretrained_blobs[1].flatten(), output + '_b'
        )
        caffe_op.input.extend([output + '_b'])
        return caffe_op, [weight, bias]
    else:
        return caffe_op, [weight]


@TranslatorRegistry.Register("Crop")
def TranslateCrop(layer, pretrained_blobs, is_test, **kwargs):
    net, net_params, input_dims = kwargs['net'], kwargs['net_params'], kwargs['input_dims']
    n, c, h, w = input_dims
    dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
    dim_map = _GetBlobDimMap(net, net_params, dummy_input)
    param = layer.crop_param
    axis, offsets = param.axis, param.offset
    caffe_op = BaseTranslate(layer, "Slice")
    input_1 = caffe_op.input[1]
    input_1_dim = dim_map[input_1]
    starts, ends = [], []
    dims = len(dim_map[input_1])
    assert len(offsets) == 1, 'Caffe Translator for Crop only works for offset \
    of 1 for now'
    for _ in range(axis):
        starts.append(0)
        ends.append(-1)
    end_offset = [int(offsets[0] + input_1_dim[i]) for i in range(axis, dims)]
    ends.extend(end_offset)
    starts.extend([offsets[0]] * len(end_offset))
    op = caffe2_pb2.OperatorDef()
    op.input.extend([caffe_op.input[0]])
    op.output.extend(caffe_op.output)
    op.arg.extend(caffe_op.arg)
    op.type = caffe_op.type
    AddArgument(op, "starts", starts)
    AddArgument(op, "ends", ends)
    return op, []

@TranslatorRegistry.Register("ReLU")
def TranslateRelu(layer, pretrained_blobs, is_test, **kwargs):
    return BaseTranslate(layer, "Relu"), []


@TranslatorRegistry.Register("Pooling")
def TranslatePool(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.pooling_param
    if param.pool == caffe_pb2.PoolingParameter.MAX:
        caffe_op = BaseTranslate(layer, "MaxPool")
    elif param.pool == caffe_pb2.PoolingParameter.AVE:
        caffe_op = BaseTranslate(layer, "AveragePool")
    _TranslateStridePadKernelHelper(param, caffe_op)
    AddArgument(caffe_op, "order", "NCHW")
    try:
        # In the Facebook port of Caffe, a torch_pooling field was added to
        # map the pooling computation of Torch. Essentially, it uses
        #   floor((height + 2 * padding - kernel) / stride) + 1
        # instead of
        #   ceil((height + 2 * padding - kernel) / stride) + 1
        # which is Caffe's version.
        # Torch pooling is actually the same as Caffe2 pooling, so we don't
        # need to do anything.
        is_torch_pooling = param.torch_pooling
    except AttributeError:
        is_torch_pooling = False
    if not is_torch_pooling:
        AddArgument(caffe_op, "legacy_pad",
                    caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
    if param.global_pooling:
        AddArgument(caffe_op, "global_pooling", 1)
    return caffe_op, []


@TranslatorRegistry.Register("Pooling3D")
def TranslatePool3D(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.pooling3d_param
    if param.pool == caffe_pb2.Pooling3DParameter.MAX:
        caffe_op = BaseTranslate(layer, "MaxPool")

    elif param.pool == caffe_pb2.Pooling3DParameter.AVE:
        caffe_op = BaseTranslate(layer, "AveragePool")
    AddArgument(caffe_op, "order", "NCHW")
    AddArgument(
        caffe_op,
        "kernels",
        [param.kernel_depth, param.kernel_size, param.kernel_size])

    AddArgument(
        caffe_op,
        "strides",
        [param.temporal_stride, param.stride, param.stride])
    temporal_pad = 0
    spatial_pad = 0
    if hasattr(param, 'temporal_pad'):
        temporal_pad = param.temporal_pad
    if hasattr(param, 'pad'):
        spatial_pad = param.pad
    AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
    return caffe_op, []


@TranslatorRegistry.Register("LRN")
def TranslateLRN(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "LRN")
    caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
    param = layer.lrn_param
    if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
        raise ValueError(
            "Does not support norm region other than across channels.")
    AddArgument(caffe_op, "size", int(param.local_size))
    AddArgument(caffe_op, "alpha", float(param.alpha))
    AddArgument(caffe_op, "beta", float(param.beta))
    AddArgument(caffe_op, "bias", float(param.k))
    AddArgument(caffe_op, "order", "NCHW")
    return caffe_op, []


@TranslatorRegistry.Register("InnerProduct")
def TranslateInnerProduct(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.inner_product_param
    try:
        if param.axis != 1 or param.transpose:
            raise ValueError(
                "We don't have testing case for non-default axis and transpose "
                "cases yet so we are disabling it for now. If you have a model "
                "with this, please do send us your model for us to update this "
                "support, and you are more than welcome to send a PR for this.")
    except AttributeError:
        # We might be using an historic Caffe protobuf that does not have axis
        # and transpose arguments, so we will silently pass.
        pass
    caffe_op = BaseTranslate(layer, "FC")
    output = caffe_op.output[0]
    caffe_op.input.extend([output + '_w', output + '_b'])
    # To provide the old-style 4-dimensional blob (1, 1, dim_output, dim_input)
    # case, we always explicitly reshape the pretrained blob.
    if pretrained_blobs[0].ndim not in [2, 4]:
        raise ValueError("Unexpected weight ndim.")
    if (pretrained_blobs[0].ndim == 4 and
            list(pretrained_blobs[0].shape[:2]) != [1, 1]):
        raise ValueError(
            "If pretrained blob has 4 dims (old-style Caffe), the first two "
            "should be of value 1, but I got " + str(pretrained_blobs[0].shape))
    weight = utils.NumpyArrayToCaffe2Tensor(
        pretrained_blobs[0].reshape(-1, pretrained_blobs[0].shape[-1]),
        output + '_w'
    )
    bias = utils.NumpyArrayToCaffe2Tensor(
        pretrained_blobs[1].flatten(), output + '_b'
    )
    return caffe_op, [weight, bias]


@TranslatorRegistry.Register("Dropout")
def TranslateDropout(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Dropout")
    caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
    param = layer.dropout_param
    AddArgument(caffe_op, "ratio", param.dropout_ratio)
    if (is_test):
        AddArgument(caffe_op, "is_test", 1)
    return caffe_op, []


@TranslatorRegistry.Register("Softmax")
def TranslateSoftmax(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Softmax")
    return caffe_op, []


@TranslatorRegistry.Register("SoftmaxWithLoss")
def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test, **kwargs):
    softmax_op = core.CreateOperator(
        "Softmax", [layer.bottom[0]],
        layer.bottom[0] + "_translator_autogen_softmax")
    xent_op = core.CreateOperator(
        "LabelCrossEntropy",
        [softmax_op.output[0], layer.bottom[1]],
        layer.bottom[0] + "_translator_autogen_xent")
    loss_op = core.CreateOperator(
        "AveragedLoss",
        xent_op.output[0],
        layer.top[0])
    return [softmax_op, xent_op, loss_op], []


@TranslatorRegistry.Register("Accuracy")
def TranslateAccuracy(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Accuracy")
    if layer.accuracy_param.top_k != 1:
        AddArgument(caffe_op, "top_k", layer.accuracy_param.top_k)
    return caffe_op, []


@TranslatorRegistry.Register("Concat")
def TranslateConcat(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Concat")
    caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
    AddArgument(caffe_op, "order", "NCHW")
    return caffe_op, []


@TranslatorRegistry.Register("TanH")
def TranslateTanH(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Tanh")
    return caffe_op, []


@TranslatorRegistry.Register("InstanceNorm")
def TranslateInstanceNorm(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "InstanceNorm")
    output = caffe_op.output[0]
    weight = utils.NumpyArrayToCaffe2Tensor(
        pretrained_blobs[0].flatten(), output + '_w')
    bias = utils.NumpyArrayToCaffe2Tensor(
        pretrained_blobs[1].flatten(), output + '_b')
    caffe_op.input.extend([output + '_w', output + '_b'])
    AddArgument(caffe_op, "order", "NCHW")
    return caffe_op, [weight, bias]


@TranslatorRegistry.Register("BatchNorm")
def TranslateBatchNorm(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "SpatialBN")
    output = caffe_op.output[0]
    param = layer.batch_norm_param
    AddArgument(caffe_op, "is_test", is_test)
    AddArgument(caffe_op, "epsilon", param.eps)
    AddArgument(caffe_op, "order", "NCHW")

    caffe_op.input.extend(
        [output + "_scale",
         output + "_bias",
         output + "_mean",
         output + "_var"])
    if not is_test:
        caffe_op.output.extend(
            [output + "_mean",
             output + "_var",
             output + "_saved_mean",
             output + "_saved_var"])

    n_channels = pretrained_blobs[0].shape[0]
    if pretrained_blobs[2][0] != 0:
        mean = utils.NumpyArrayToCaffe2Tensor(
            (1. / pretrained_blobs[2][0]) * pretrained_blobs[0],
            output + '_mean')
        var = utils.NumpyArrayToCaffe2Tensor(
            (1. / pretrained_blobs[2][0]) * pretrained_blobs[1],
            output + '_var')
    else:
        raise RuntimeError("scalar is zero.")
    if len(pretrained_blobs) > 3:
        # IntelCaffe and NVCaffe uses fused BN+Scale,
        # three blobs for BN and two blobs for Scale,
        # so that the total number of blobs becomes five (including scale and bias).
        scale = utils.NumpyArrayToCaffe2Tensor(
            pretrained_blobs[3].flatten(),
            output + '_scale')
        bias = utils.NumpyArrayToCaffe2Tensor(
            pretrained_blobs[4].flatten(),
            output + '_bias')
    else:
        pretrained_blobs[2][0] = 1
        pretrained_blobs[2] = np.tile(pretrained_blobs[2], (n_channels, ))
        scale = utils.NumpyArrayToCaffe2Tensor(
            pretrained_blobs[2],
            output + '_scale')
        bias = utils.NumpyArrayToCaffe2Tensor(
            np.zeros_like(pretrained_blobs[2]),
            output + '_bias')

    return caffe_op, [scale, bias, mean, var]


@TranslatorRegistry.Register("Eltwise")
def TranslateElementWise(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.eltwise_param
    # TODO(jiayq): if we have a protobuf that uses this, lift this constraint
    # and verify that we can correctly translate.
    if len(param.coeff) or param.operation != 1:
        raise RuntimeError("This eltwise layer is not yet supported.")
    caffe_op = BaseTranslate(layer, "Sum")
    return caffe_op, []


@TranslatorRegistry.Register("Scale")
def TranslateScale(layer, pretrained_blobs, is_test, **kwargs):
    mul_op = BaseTranslate(layer, "Mul")
    scale_param = layer.scale_param
    AddArgument(mul_op, "axis", scale_param.axis)
    AddArgument(mul_op, "broadcast", True)
    if len(mul_op.input) == 1:
        # the scale parameter is in pretrained blobs
        if scale_param.num_axes != 1:
            raise RuntimeError("This path has not been verified yet.")

        output = mul_op.output[0]
        mul_op_param = output + 'scale_w'
        mul_op.input.append(mul_op_param)
        weights = []
        weights.append(utils.NumpyArrayToCaffe2Tensor(
            pretrained_blobs[0].flatten(), mul_op_param))

        add_op = None
        if len(pretrained_blobs) == 1:
            # No bias-term in Scale layer
            pass
        elif len(pretrained_blobs) == 2:
            # Caffe Scale layer supports a bias term such that it computes
            # (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
            # Include a separate Add op for the bias followed by Mul.
            add_op = copy.deepcopy(mul_op)
            add_op.type = "Add"
            add_op_param = output + 'scale_b'
            internal_blob = output + "_internal"
            del mul_op.output[:]
            mul_op.output.append(internal_blob)
            del add_op.input[:]
            add_op.input.append(internal_blob)
            add_op.input.append(add_op_param)
            weights.append(utils.NumpyArrayToCaffe2Tensor(
                pretrained_blobs[1].flatten(), add_op_param))
        else:
            raise RuntimeError("Unexpected number of pretrained blobs in Scale")

        caffe_ops = [mul_op]
        if add_op:
            caffe_ops.append(add_op)
        assert len(caffe_ops) == len(weights)
        return caffe_ops, weights
    elif len(mul_op.input) == 2:
        # TODO(jiayq): find a protobuf that uses this and verify.
        raise RuntimeError("This path has not been verified yet.")
    else:
        raise RuntimeError("Unexpected number of inputs.")


@TranslatorRegistry.Register("Reshape")
def TranslateReshape(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Reshape")
    caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
    reshape_param = layer.reshape_param
    AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
    return caffe_op, []


@TranslatorRegistry.Register("Flatten")
def TranslateFlatten(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.flatten_param
    if param.end_axis != -1:
        raise NotImplementedError("flatten_param.end_axis not supported yet.")

    if param.axis == 0:
        caffe_op = BaseTranslate(layer, "FlattenToVec")
    elif param.axis == 1:
        caffe_op = BaseTranslate(layer, "Flatten")
    else:
        # This could be a Reshape op, but dim size is not known here.
        raise NotImplementedError(
            "Not supported yet for flatten_param.axis {}.".format(param.axis))

    return caffe_op, []


@TranslatorRegistry.Register("Sigmoid")
def TranslateSigmoid(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "Sigmoid")
    return caffe_op, []


@TranslatorRegistry.Register("ROIPooling")
def TranslateROIPooling(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "RoIPool")
    AddArgument(caffe_op, "order", "NCHW")

    if is_test:
        AddArgument(caffe_op, "is_test", is_test)
    else:
        # Only used for gradient computation
        caffe_op.output.append(caffe_op.output[0] + '_argmaxes')

    param = layer.roi_pooling_param
    if param.HasField('pooled_h'):
        AddArgument(caffe_op, 'pooled_h', param.pooled_h)
    if param.HasField('pooled_w'):
        AddArgument(caffe_op, 'pooled_w', param.pooled_w)
    if param.HasField('spatial_scale'):
        AddArgument(caffe_op, 'spatial_scale', param.spatial_scale)

    return caffe_op, []


@TranslatorRegistry.Register("PReLU")
def TranslatePRelu(layer, pretrained_blobs, is_test, **kwargs):
    caffe_op = BaseTranslate(layer, "PRelu")
    output = caffe_op.output[0]
    caffe_op.input.extend([output + '_Slope'])
    slope = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_Slope')

    return caffe_op, [slope]


@TranslatorRegistry.Register("Reduction")
def TranslateReduction(layer, pretrained_blobs, is_test, **kwargs):
    param = layer.reduction_param
    if param.operation == caffe_pb2.ReductionParameter.SUM:
        caffe_op = BaseTranslate(layer, "ReduceBackSum")
    elif param.operation == caffe_pb2.ReductionParameter.MEAN:
        caffe_op = BaseTranslate(layer, "ReduceBackMean")
    else:
        raise NotImplementedError("Not yet supported")

    if param.axis > 0:
        # We can't figure out the number of dims to reduce from positive axis
        # for back reduction since the shape info is not known here.
        raise NotImplementedError("Not yet supported")
    num_reduce_dim = -param.axis
    AddArgument(caffe_op, "num_reduce_dim", num_reduce_dim)

    return caffe_op, []


if __name__ == '__main__':
    #step1 参数解析
    parser = argparse.ArgumentParser(
        description="Utilitity to convert pretrained caffe models to Caffe2 models.")
    parser.add_argument("prototext", help="Caffe prototext.")
    parser.add_argument("caffemodel", help="Caffe trained model.")
    parser.add_argument("--init_net", help="Caffe2 initialization net.",
                        default="init_net.pb")
    parser.add_argument("--predict_net", help="Caffe2 prediction net.",
                        default="predict_net.pb")
    parser.add_argument("--remove_legacy_pad", help="Remove legacy pad \
                        (Only works for nets with one input blob)",
                        action="store_true",
                        default=False)# 移除旧的pad方式,caffe的pad方式为旧的方式,如果参数remove_legacy_pad不出现则默认不移除
    parser.add_argument("--input_dims", help="Dimension of input blob", nargs='+',
                        type=int, default=[])#出入blob的维度
    args = parser.parse_args()

    #step2 变量初始化
    #step2.1 声明两个caffe的Net Message
    caffenet = caffe_pb2.NetParameter()# 用于deploy.prototxt
    caffenet_pretrained = caffe_pb2.NetParameter()# 用于caffemodel
    #step2.2 输入时prototxt,caffemodel
    input_proto = args.prototext
    input_caffemodel = args.caffemodel
    #step2.3 输出是init_net, predict_net
    output_init_net = args.init_net
    output_predict_net = args.predict_net

    #step3 向caffenet,caffenet_pretrained填充数据
    #将deploy.prototxt直接读到caffenet中
    text_format.Merge(
        open(input_proto, 'r').read(), caffenet
    )
    #caffemodel则是通过ParseFromString的方式读到caffenet_pretrained
    caffenet_pretrained.ParseFromString(
        open(input_caffemodel, 'rb').read()
    )

    #step4 重中之重,
    # net:包含网络中每个Op的信息,和不可学习的参数,如pad,stride
    # pretrained_params: 包含每个Op中可学习的参数,如weight,bias
    net, pretrained_params = TranslateModel(
        caffenet, caffenet_pretrained, is_test=True,
        remove_legacy_pad=args.remove_legacy_pad,
        input_dims=args.input_dims
    )

    #step5 向net中填充输入,输出Op,还有参数
    # Assume there is one input and one output
    # net.external_input 包含了网络的输入,和各参数的名字,这些都是网络中每个Op中可学习的参数,也可以看做是net的额外输入
    # net.external_output 包含网络的输出
    external_input = net.op[0].input[0]
    external_output = net.op[-1].output[0]

    net.external_input.extend([external_input])
    net.external_input.extend([param.name for param in pretrained_params.protos])
    net.external_output.extend([external_output])
    init_net = ConvertTensorProtosToInitNet(pretrained_params, external_input)#注意这里是external_input,不是net.external_input

    #step6 将init_net,predict_net 写进文件
    #net只包含Op信息,不包含可学习的参数,相当于deploy.prototxt
    with open(output_predict_net, 'wb') as f:
        f.write(net.SerializeToString())
    with open(output_predict_net + 'txt', 'w') as f:
        f.write(str(net))
    #init_net包含的可学习参数,相当于caffemodel
    with open(output_init_net, 'wb') as f:
        f.write(init_net.SerializeToString())
``
 类似资料: