67 lines
2.5 KiB
C++
67 lines
2.5 KiB
C++
#include "caffe2/operators/tt_linear_op.h"
|
|
|
|
namespace caffe2 {
|
|
namespace {
|
|
|
|
REGISTER_CPU_OPERATOR(TT, TTLinearOp<float, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(TTLinearGradient, TTLinearGradientOp<float, CPUContext>);
|
|
|
|
// The TT-layer serves as a low-rank decomposition of a fully connected layer.
|
|
// The inputs are the same as to an FC layer, but the number of the parameters
|
|
// are greatly reduced.
|
|
OPERATOR_SCHEMA(TT)
|
|
.NumInputs(3)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
The TT-layer serves as a low-rank decomposition of a fully connected layer. The
|
|
inputs are the same as to a fully connected layer, but the number of parameters
|
|
are greatly reduced and forward computation time can be drastically reduced
|
|
especially for layers with large weight matrices. The multiplication is computed
|
|
as a product of the input vector with each of the cores that make up the TT
|
|
layer. Given the input sizes (inp_sizes), output sizes(out_sizes), and the ranks
|
|
of each of the cores (tt_ranks), the ith core will have size:
|
|
|
|
inp_sizes[i] * tt_ranks[i] * tt_ranks[i + 1] * out_sizes[i].
|
|
|
|
The complexity of the computation is dictated by the sizes of inp_sizes,
|
|
out_sizes, and tt_ranks, where there is the trade off between accuracy of the
|
|
low-rank decomposition and the speed of the computation.
|
|
)DOC")
|
|
.Arg(
|
|
"inp_sizes",
|
|
"(int[]) Input sizes of cores. Indicates the input size of "
|
|
"the individual cores; the size of the input vector X must match the "
|
|
"product of the inp_sizes array.")
|
|
.Arg(
|
|
"out_sizes",
|
|
"(int[]) Output sizes of cores. Indicates the output size "
|
|
"of the individual cores; the size of the output vector Y must match "
|
|
"the product of the out_sizes array.")
|
|
.Arg(
|
|
"tt_ranks",
|
|
"(int[]) Ranks of cores. Indicates the ranks of the "
|
|
"individual cores; lower rank means larger compression, faster "
|
|
"computation but reduce accuracy.")
|
|
.Input(
|
|
0,
|
|
"X",
|
|
"Input tensor from previous layer with size (M x K), where "
|
|
"M is the batch size and K is the input size.")
|
|
.Input(1, "b", "1D blob containing the bias vector")
|
|
.Input(
|
|
2,
|
|
"cores",
|
|
"1D blob containing each individual cores with sizes "
|
|
"specified above.")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"Output tensor from previous layer with size (M x N), "
|
|
"where M is the batch size and N is the output size.");
|
|
|
|
OPERATOR_SCHEMA(TTLinearGradient);
|
|
|
|
GRADIENT_NOT_IMPLEMENTED_YET(TT);
|
|
} // namespace
|
|
} // namespace caffe2
|