198 lines
8.1 KiB
C++
198 lines
8.1 KiB
C++
#include "caffe2/sgd/learning_rate_op.h"
|
|
|
|
namespace caffe2 {
|
|
REGISTER_CPU_OPERATOR(LearningRate, LearningRateOp<float, CPUContext>);
|
|
|
|
OPERATOR_SCHEMA(LearningRate)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.TensorInferenceFunction([](const OperatorDef&,
|
|
const vector<TensorShape>& in) {
|
|
vector<TensorShape> out(1);
|
|
out[0] = in[0];
|
|
return out;
|
|
})
|
|
.SetDoc(R"DOC(
|
|
Learning rate is a decreasing function of time. With low learning rates the
|
|
improvements will be linear. With high learning rates they will start to look
|
|
more exponential. Learning rate is controlled by the following arguments:
|
|
|
|
|
|
Required:
|
|
`iterations`
|
|
`base_lr`: base learning rate
|
|
`policy`: this controls how the learning rate is applied, options are:
|
|
`fixed`
|
|
`step`: uses `stepsize`, `gamma`
|
|
`exp`: uses `gamma`
|
|
`gate`: uses 'multiplier_1', 'multiplier_2', `num_iter``
|
|
`inv`: uses `gamma`, `power`
|
|
`linearWarmup`: uses `start_multiplier`, `num_iter`
|
|
`constantWarmup`: uses `multiplier`, `num_iter`
|
|
`alter`: uses `active_first`, `active_period`, `inactive_period`
|
|
`hill`: uses those in both `linearWarmup` and `inv`, plus `end_multiplier`
|
|
`composite`: uses `sub_policy_num_iters` and additional args with format
|
|
`cyclic`: uses `max_lr`, `stepsize`
|
|
`cosine`: uses `min_lr`, `max_lr`, `period`, `t_mult`, `lr_shrink`
|
|
`constantThenLinearWarmup`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`
|
|
`compositeCyclical`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cyclical_max_lr`, `cyclical_step_size`, `cyclical_decay`
|
|
`compositeCosine`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cosine_max_lr`, `cosine_period`, `cosine_t_mult`, `cosine_lr_shrink`
|
|
sub_policy_{sub_policy_index}_{sub_policy_arg}, for example:
|
|
sub_policy_0_policy: "exp", sub_policy_0_gamma: 0.99,
|
|
sub_policy_0_lr_scale: 1.2
|
|
sub_policy_0_policy: "fixed", sub_policy_0_lr_scale: 1.0
|
|
sub_policy_num_iters: [1000, 1000]
|
|
|
|
Optional:
|
|
`stepsize`: defaults to 0
|
|
`max_lr`: defaults to 0.005
|
|
`gamma`: defaults to 0
|
|
`power`: defaults to 0
|
|
`num_iter`: defaults to 0
|
|
`start_multiplier`: defaults to 0
|
|
`multiplier`: defaults to 0.5
|
|
`multiplier_1`: defaults to 1
|
|
`multiplier_2`: defaults to 1
|
|
`m1`: defaults to 0.5, the first piece lr of piece warmup
|
|
`n1`: defaults to 0, iter threshold of the first piece lr
|
|
`m2`: defaults to 0.5, the second piece lr of piece warmup
|
|
`n2`: defaults to 0, iter threshold of the second piece lr
|
|
`m3`: defaults to 0.5, the third piece lr of piece warmup
|
|
`start_warmup_multiplier`: defaults to 0.1, part of constantThenLinearWarmup
|
|
`constant_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup and constantThenLinearWarmup
|
|
`linear_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup, CompositeCyclicalLRPolicy, CompositeCosineLRPolicy
|
|
`cyclical_max_lr`: defaults to 0.05, part of CompositeCyclicalLRPolicy
|
|
`cyclical_step_size`: defaults to 1000000, part of CompositeCyclicalLRPolicy
|
|
`cyclical_decay`: defaults to 1.0, part of CompositeCyclicalLRPolicy
|
|
`cosine_min_lr`:defaults to 0.01, part of CompositeCosineLRPolicy
|
|
`cosine_max_lr`:defaults to 0.05, part of CompositeCosineLRPolicy
|
|
`cosine_period`:defaults to 50, part of CompositeCosineLRPolicy
|
|
`cosine_t_mult`:defaults to 1.0, part of CompositeCosineLRPolicy
|
|
`cosine_lr_shrink`:defaults to 0.99, part of CompositeCosineLRPolicy
|
|
|
|
Usage:
|
|
train_net.LearningRate(*iterations*, "*label*", base_lr=*float*,
|
|
policy="policy_name", stepsize=*int*, gamma=*float*)
|
|
|
|
|
|
Example usage:
|
|
train_net.LearningRate(200, "LR", base_lr=-0.1,
|
|
policy="step", stepsize=20, gamma=0.9)
|
|
)DOC")
|
|
.Arg("base_lr", "(float, required) base learning rate")
|
|
.Arg("policy", "(float, default 1.0) strategy for gamma enforcement")
|
|
.Arg("power", "(float, default 1.0) used only for inv policy type")
|
|
.Arg("gamma", "(float, default 1.0) momentum of change")
|
|
.Arg("stepsize", "(float, default 1.0) sampling rate on iterations")
|
|
.Arg("max_lr", "(float, default 0.005) max learning rate")
|
|
.Arg("active_first", "(boolean, default True) in alter policy")
|
|
.Arg("active_period", "(int64_t, required) in alter policy")
|
|
.Arg("inactive_period", "(int64_t, required) in alter policy")
|
|
.Arg(
|
|
"max_iter",
|
|
"(int, default -1) maximum iterations in this training run")
|
|
.Arg(
|
|
"num_iter",
|
|
"(int, default 0) number of iterations over which to warmup lr")
|
|
.Arg(
|
|
"start_multiplier",
|
|
"(float, default 0) starting multiplier for learning rate")
|
|
.Arg(
|
|
"end_multiplier",
|
|
"(float, default 0) end multiplier for learning rate")
|
|
.Arg(
|
|
"multiplier",
|
|
"(float, default 0.5) constant multiplier for learning rate")
|
|
.Arg(
|
|
"multiplier_1",
|
|
"(float, default 1) start multiplier for learning rate")
|
|
.Arg("multiplier_2", "(float, default 1) end multiplier for learning rate")
|
|
.Arg(
|
|
"sub_policy_num_iters",
|
|
"(int array, default empty) number of iterations for each sub learning rate policy in composite policy")
|
|
.Arg("m1", "")
|
|
.Arg("n1", "")
|
|
.Arg("m2", "")
|
|
.Arg("n2", "")
|
|
.Arg("m3", "")
|
|
.Arg("start_warmup_multiplier", "defaults to 0.1")
|
|
.Arg("constant_warmup_num_iter", "defaults to 10000000")
|
|
.Arg("linear_warmup_num_iter", "defaults to 10000000")
|
|
.Arg(
|
|
"cyclical_max_lr",
|
|
"defaults to 0.05, part of CompositeCyclicalLRPolicy")
|
|
.Arg(
|
|
"cyclical_step_size",
|
|
"defaults to 1000000, part of CompositeCyclicalLRPolicy")
|
|
.Arg(
|
|
"cyclical_decay",
|
|
"defaults to 0.999, part of CompositeCyclicalLRPolicy")
|
|
.Arg("cosine_min_lr", "defaults to 0.01, part of CompositeCosineLRPolicy")
|
|
.Arg("cosine_max_lr", "defaults to 0.05, part of CompositeCosineLRPolicy")
|
|
.Arg("cosine_period", "defaults to 50, part of CompositeCosineLRPolicy")
|
|
.Arg("cosine_t_mult", "defaults to 1,0, part of CompositeCosineLRPolicy")
|
|
.Arg(
|
|
"cosine_lr_shrink",
|
|
"defaults to 0.99, part of CompositeCosineLRPolicy")
|
|
.Arg(
|
|
"num_iter_1",
|
|
"(int, default 0) number of iterations over which to warmup for slope policy")
|
|
.Arg(
|
|
"num_iter_2",
|
|
"(int, default 0) number of iterations over which to gradually gate for slope policy")
|
|
.Input(0, "input", "description needed")
|
|
.Output(0, "output", "description needed")
|
|
.DeviceInferenceFunction([](const OperatorDef& def) {
|
|
return std::make_pair(
|
|
std::vector<DeviceOption>{DeviceOption()},
|
|
std::vector<DeviceOption>{def.device_option()});
|
|
});
|
|
|
|
NO_GRADIENT(LearningRate);
|
|
} // namespace caffe2
|
|
|
|
using LearningRateOpFloatCPU =
|
|
caffe2::LearningRateOp<float, caffe2::CPUContext>;
|
|
|
|
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
|
|
LearningRate,
|
|
"_caffe2::LearningRate("
|
|
"Tensor iterations, "
|
|
"float base_lr,"
|
|
"str policy, "
|
|
"float? power = 1.0, "
|
|
"float? gamma = 1.0, "
|
|
"int? stepsize = 1, "
|
|
"float? max_lr = 0.005, "
|
|
"bool? active_first = True, "
|
|
"int? active_period = -1, "
|
|
"int? inactive_period = -1, "
|
|
"int? max_iter = -1, "
|
|
"int? num_iter = 0, "
|
|
"float? start_multiplier = 0, "
|
|
"float? end_multiplier = 0, "
|
|
"float? multiplier = 0.5, "
|
|
"float? multiplier_1 = 1.0, "
|
|
"float? multiplier_2 = 1.0, "
|
|
"int[]? sub_policy_num_iters = None, "
|
|
"float? m1 = 0.5, "
|
|
"float? n1 = 0, "
|
|
"float? m2 = 0.5, "
|
|
"float? n2 = 0, "
|
|
"float? m3 = 0.5, "
|
|
"float? start_warmup_multiplier = 0.1, "
|
|
"int? constant_warmup_num_iter = 10000000, "
|
|
"int? linear_warmup_num_iter = 10000000, "
|
|
"float? cyclical_max_lr = 0.05, "
|
|
"int? cyclical_step_size = 1000000, "
|
|
"float? cyclical_decay = 0.999, "
|
|
"float? cosine_min_lr = 0.01, "
|
|
"float? cosine_max_lr = 0.05, "
|
|
"int? cosine_period = 50, "
|
|
"float? cosine_t_mult = 1.0, "
|
|
"float? cosine_lr_shrink = 0.99, "
|
|
"float? decay = 1.0, "
|
|
"int? num_iter_1 = 0, "
|
|
"int? num_iter_2 = 0) -> Tensor output",
|
|
LearningRateOpFloatCPU);
|