pytorch/caffe2/operators/tt_linear_op.cc

#include "caffe2/operators/tt_linear_op.h"

namespace caffe2 {
namespace {

REGISTER_CPU_OPERATOR(TT, TTLinearOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(TTLinearGradient, TTLinearGradientOp<float, CPUContext>);

// The TT-layer serves as a low-rank decomposition of a fully connected layer.
// The inputs are the same as to an FC layer, but the number of the parameters
// are greatly reduced.
OPERATOR_SCHEMA(TT)
    .NumInputs(3)
    .NumOutputs(1)
    .SetDoc(R"DOC(
The TT-layer serves as a low-rank decomposition of a fully connected layer. The
inputs are the same as to a fully connected layer, but the number of parameters
are greatly reduced and forward computation time can be drastically reduced
especially for layers with large weight matrices. The multiplication is computed
as a product of the input vector with each of the cores that make up the TT
layer. Given the input sizes (inp_sizes), output sizes(out_sizes), and the ranks
of each of the cores (tt_ranks), the ith core will have size:

    inp_sizes[i] * tt_ranks[i] * tt_ranks[i + 1] * out_sizes[i].

The complexity of the computation is dictated by the sizes of inp_sizes,
out_sizes, and tt_ranks, where there is the trade off between accuracy of the
low-rank decomposition and the speed of the computation.
)DOC")
    .Arg(
        "inp_sizes",
        "(int[]) Input sizes of cores. Indicates the input size of "
        "the individual cores; the size of the input vector X must match the "
        "product of the inp_sizes array.")
    .Arg(
        "out_sizes",
        "(int[]) Output sizes of cores. Indicates the output size "
        "of the individual cores; the size of the output vector Y must match "
        "the product of the out_sizes array.")
    .Arg(
        "tt_ranks",
        "(int[]) Ranks of cores. Indicates the ranks of the "
        "individual cores; lower rank means larger compression, faster "
        "computation but reduce accuracy.")
    .Input(
        0,
        "X",
        "Input tensor from previous layer with size (M x K), where "
        "M is the batch size and K is the input size.")
    .Input(1, "b", "1D blob containing the bias vector")
    .Input(
        2,
        "cores",
        "1D blob containing each individual cores with sizes "
        "specified above.")
    .Output(
        0,
        "Y",
        "Output tensor from previous layer with size (M x N), "
        "where M is the batch size and N is the output size.");

OPERATOR_SCHEMA(TTLinearGradient);

GRADIENT_NOT_IMPLEMENTED_YET(TT);
} // namespace
} // namespace caffe2