tf.contrib.slim.learning.train
_USE_DEFAULT=0
def train(train_op,
logdir,
train_step_fn=train_step,
train_step_kwargs=_USE_DEFAULT,
log_every_n_steps=1,
graph=None,
master='',
is_chief=True,
global_step=None,
number_of_steps=None,
init_op=_USE_DEFAULT,
init_feed_dict=None,
local_init_op=_USE_DEFAULT,
init_fn=None,
ready_op=_USE_DEFAULT,
summary_op=_USE_DEFAULT,
save_summaries_secs=600,
summary_writer=_USE_DEFAULT,
startup_delay_steps=0,
saver=None,
save_interval_secs=600,
sync_optimizer=None,
session_config=None,
session_wrapper=None,
trace_every_n_steps=None,
ignore_live_threads=False)
使用TensorFlow 的监督器(supervisor)来运行训练循环。 提供sync_optimizer时,讲同步进行梯度更新,否则将异步进行梯度更新。
Tensor
,当被执行的时候,将进行梯度更新并返回损失值。# 加载数据/创建模型
images, labels = LoadData(...)
predictions = MyModel(images)
# 定义损失函数loss
slim.losses.log_loss(predictions, labels)
total_loss = slim.losses.get_total_loss()
# 定义优化器optimizer
optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
# 创建train_op
train_op = slim.learning.create_train_op(total_loss, optimizer)
# 运行训练.
slim.learning.train(train_op, my_log_dir)
通过指定init_fn参数(该参数是一个函数)
# 创建 train_op
train_op = slim.learning.create_train_op(total_loss, optimizer)
# 创建初始化赋值 op
checkpoint_path = '/path/to/checkpoint'
#恢复所有变量
variables_to_restore = slim.get_model_variables()
init_fn = slim.assign_from_checkpoint_fn(checkpoint_path, variables_to_restore)
# 运行训练
slim.learning.train(train_op, my_log_dir, init_fn=init_fn)
...
#从检查点文件中恢复name='v2'的变量
variables_to_restore = slim.get_variables_by_name("v2")
# or 从检查点文件中恢复name带有2的所有变量
variables_to_restore = slim.get_variables_by_suffix("2")
# or 从检查点文件中恢复命名空间scope='nested'的所有变量
variables_to_restore = slim.get_variables(scope="nested")
# or 恢复命名空间scope='nested'的所有变量
variables_to_restore = slim.get_variables_to_restore(include=['fc6', 'fc7', 'fc8'])
# or 除了命名空间scope='v1'的变量
variables_to_restore = slim.get_variables_to_restore(exclude=['fc6', 'fc7', 'fc8'])
...
# 'conv1/weights'从checkpoint的'vgg16/conv1/weights'中恢复
def name_in_checkpoint(var):
return 'vgg16/' + var.op.name
# 'conv1/weights'和'conv1/bias'从checkpoint的'conv1/params1'和'conv1/params2'中恢复
def name_in_checkpoint(var):
if "weights" in var.op.name:
return var.op.name.replace("weights", "params1")
if "bias" in var.op.name:
return var.op.name.replace("bias", "params2")
variables_to_restore = slim.get_model_variables()
variables_to_restore = {name_in_checkpoint(var):var for var in variables_to_restore}
...
# 创建 train_op
train_op = slim.learning.create_train_op(total_loss, optimizer)
# 创建变量名到值的映射(from variable names to values):
var0_initial_value = ReadFromDisk(...)
var1_initial_value = ReadFromDisk(...)
var_names_to_values = {'var0': var0_initial_value,
'var1': var1_initial_value,
}
init_assign_op, init_feed_dict = slim.assign_from_values(var_names_to_values)
# 创建初始化赋值函数
def InitAssignFn(sess):
sess.run(init_assign_op, init_feed_dict)
# 也可以使用 init_fn = slim.assign_from_values_fn(var_names_to_values)
# 运行训练
slim.learning.train(train_op, my_log_dir, init_fn=InitAssignFn)
通过定制train_op实现
# slim.learning.create_train_op
def create_train_op(total_loss,
optimizer,
global_step=_USE_GLOBAL_STEP,
update_ops=None,
variables_to_train=None,
clip_gradient_norm=0,
summarize_gradients=False,
gate_gradients=tf_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
gradient_multipliers=None,
check_numerics=True)
variables_to_train = slim.get_trainable_variables(scope='vgg16')
variabels_to_exclude = slim.get_trainable_variables(scope='vgg16/fc8')
variables_to_train = [var for var in variables_to_train if var not in variabels_to_exclude]
train_op = slim.learning.create_train_op(total_loss, optimizer, variables_to_train=variables_to_train)
...
更新不在Graph图中的option,如moving mean 和 moving variance等。
# 方式一:
# 强制 TF-Slim 不采用任何 update_ops:
train_op = slim.learning.create_train_op(
total_loss,
optimizer,
update_ops=[])
# 方式二:
# 替换 update ops 集:
train_op = slim.learning.create_train_op(
total_loss,
optimizer,
update_ops=my_other_update_ops)
# 方式三
# 新增 update ops 到默认的 updates:
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, my_update1)
train_op = slim.learning.create_train_op(
total_loss,
optimizer)