664 lines
22 KiB
C++
664 lines
22 KiB
C++
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
|
#include <ATen/core/Tensor.h>
|
|
#include <ATen/Config.h>
|
|
#include <ATen/Dispatch.h>
|
|
#include <ATen/NamedTensorUtils.h>
|
|
#include <ATen/native/sparse/ParamUtils.h>
|
|
#include <ATen/native/SparseTensorUtils.h>
|
|
#include <ATen/Parallel.h>
|
|
#include <c10/util/accumulate.h>
|
|
#include <c10/util/irange.h>
|
|
|
|
#ifndef AT_PER_OPERATOR_HEADERS
|
|
#include <ATen/CPUFunctions.h>
|
|
#include <ATen/Functions.h>
|
|
#include <ATen/NativeFunctions.h>
|
|
#else
|
|
#include <ATen/ops/_log_softmax_backward_data_cpu_dispatch.h>
|
|
#include <ATen/ops/_log_softmax_cpu_dispatch.h>
|
|
#include <ATen/ops/_softmax_backward_data_cpu_dispatch.h>
|
|
#include <ATen/ops/_softmax_cpu_dispatch.h>
|
|
#include <ATen/ops/_sparse_log_softmax.h>
|
|
#include <ATen/ops/_sparse_log_softmax_backward_data_native.h>
|
|
#include <ATen/ops/_sparse_log_softmax_native.h>
|
|
#include <ATen/ops/_sparse_softmax.h>
|
|
#include <ATen/ops/_sparse_softmax_backward_data_native.h>
|
|
#include <ATen/ops/_sparse_softmax_native.h>
|
|
#endif
|
|
|
|
#include <map>
|
|
|
|
namespace at::native {
|
|
namespace {
|
|
|
|
int64_t get_nvalues(const IntArrayRef& sizes, int64_t sparse_dim) {
|
|
/* Return the number of entries in the dense part of a sparse tensor.
|
|
|
|
`sizes` is a vector of sparse tensor dimensions.
|
|
`sparse_dim` is the dimension of the sparse part of a sparse tensor.
|
|
*/
|
|
return c10::multiply_integers(sizes.begin() + sparse_dim, sizes.end());
|
|
}
|
|
|
|
std::vector<int64_t> get_offsets(const Tensor& indices, const IntArrayRef& sizes, const int64_t dim) {
|
|
/*
|
|
Given the indices of a sparse tensor, return a vector of offsets
|
|
for the entries in the equivalent dense tensor:
|
|
|
|
If
|
|
offsets = get_offsets(A._indices(), A.sizes(), -1)
|
|
data = A.to_dense().resize((nnz,))
|
|
then
|
|
data[offsets[n]] == A._values()[n]
|
|
|
|
`indices` must be a contiguous 2-d tensor with int64_t entries.
|
|
`sizes` must be a vector with at least ndim entries.
|
|
|
|
`dim` is an integer. When >= 0 and < ndim, the indices of all
|
|
entries in the given dimension will be mapped to the index of the
|
|
first entry before computing the offset. Otherwise, the value is
|
|
ignored.
|
|
|
|
For example, consider a sparse tensor
|
|
|
|
11 ** ** 14 15
|
|
** 22 ** 24 **
|
|
|
|
with
|
|
|
|
indices = [[0, 0, 0, 1, 1],
|
|
[0, 3, 4, 1, 3]]
|
|
|
|
then
|
|
|
|
get_offsets(indices, (2, 5), -1) -> [0, 3, 4, 6, 8]
|
|
get_offsets(indices, (2, 5), 0) -> [0, 3, 4, 1, 3]
|
|
get_offsets(indices, (2, 5), 1) -> [0, 0, 0, 5, 5]
|
|
|
|
*/
|
|
auto ndim = indices.size(0);
|
|
auto nnz = indices.size(1);
|
|
std::vector<int64_t> offsets(nnz);
|
|
std::vector<int64_t> strides(ndim, 1);
|
|
auto indices_accessor = indices.accessor<int64_t, 2>();
|
|
|
|
if (ndim > 1) {
|
|
for (int64_t i=ndim - 2; i >= 0; i--) {
|
|
strides[i] = strides[i + 1] * sizes[i + 1];
|
|
}
|
|
}
|
|
|
|
for (const auto i : c10::irange(nnz)) {
|
|
int64_t acc = 0;
|
|
for (const auto j : c10::irange(ndim)) {
|
|
auto indices_row = indices_accessor[j];
|
|
auto stride = strides[j];
|
|
if (j != dim) {
|
|
acc += stride * indices_row[i];
|
|
}
|
|
}
|
|
offsets[i] = acc;
|
|
}
|
|
|
|
return offsets;
|
|
}
|
|
|
|
std::vector<std::vector<int64_t>> get_pools(const Tensor& indices, const IntArrayRef& sizes, const int64_t dim) {
|
|
/*
|
|
Return pools of indices that align with the given dimension.
|
|
|
|
Parameters:
|
|
`indices` - sparse tensor indices
|
|
`sizes` - sparse tensor dimensions
|
|
`dim` - given dimension
|
|
|
|
Returns:
|
|
`pools` - a ragged array of indices
|
|
|
|
A pool is defined as a list of indices (of sparse tensor values)
|
|
that participate in the same softmax computation:
|
|
|
|
- pools[i] intersection with pools[j] is empty iff i != j
|
|
- union of all pools is set(range(nnz))
|
|
- X.values[k], k in pools[i], does not affect the result of softmax(X)[n], n in pools[j], iff i != j
|
|
|
|
*/
|
|
std::vector<std::vector<int64_t>> pools;
|
|
|
|
auto ndim = indices.size(0);
|
|
auto nnz = indices.size(1);
|
|
std::vector<int64_t> strides(ndim, 1);
|
|
auto indices_accessor = indices.accessor<int64_t, 2>();
|
|
|
|
if (ndim > 1) {
|
|
for (int64_t i=ndim - 2; i >= 0; i--) {
|
|
strides[i] = strides[i + 1] * (i + 1 == dim? 1 : sizes[i + 1]);
|
|
}
|
|
}
|
|
|
|
for (const auto i : c10::irange(nnz)) {
|
|
int64_t pool_index = 0;
|
|
for (const auto j : c10::irange(ndim)) {
|
|
if (j != dim) {
|
|
const auto indices_row = indices_accessor[j];
|
|
const auto stride = strides[j];
|
|
pool_index += stride * indices_row[i];
|
|
}
|
|
}
|
|
if(static_cast<int64_t>(pools.size()) <= pool_index){
|
|
pools.resize(pool_index + 1);
|
|
}
|
|
pools.at(pool_index).push_back(i);
|
|
}
|
|
|
|
return pools;
|
|
}
|
|
|
|
template <typename scalar_t, bool LogSoftMax>
|
|
void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t dim) {
|
|
/*
|
|
See test/test_sparse.py:test_softmax:sparse_softmax for the Python
|
|
prototype of the sparse softmax algorithm that this implementation
|
|
is based on.
|
|
|
|
Derivation of the sparse softmax algorithm with an example
|
|
----------------------------------------------------------
|
|
|
|
Consider the following 2-D sparse tensor with 0-D dense part as an
|
|
example, denote it by X:
|
|
|
|
11 ** ** 14 15
|
|
** 22 ** 24 **
|
|
|
|
where `**` represent unspecified entries. The COO sparse tensor
|
|
representation of X is:
|
|
|
|
indices = [[0, 1, 0, 1, 0],
|
|
[0, 1, 3, 3, 4]]
|
|
values = [11, 22, 14, 24, 15]
|
|
|
|
that after coalescing becomes
|
|
|
|
indices = [[0, 0, 0, 1, 1],
|
|
[0, 3, 4, 1, 3]]
|
|
values = [11, 14, 15, 22, 24]
|
|
|
|
The softmax of X along the given dimension d is defined as
|
|
|
|
S_d[i, j] = exp(X[i, j]) / sum(exp(X[I_d[k]]), k=0..X.shape[d]-1)
|
|
|
|
where the index tuple I_d[k] is defined as
|
|
|
|
I_0[k] = k, j
|
|
I_1[k] = i, k
|
|
|
|
For sparse tensors, the unspecified entries are skipped in the
|
|
softmax sum of exponents so that the result will be sparse tensor
|
|
with the same indices as the input. Mathematically, this
|
|
corresponds to the case where the unspecified entries are
|
|
interpreted as negative infinities rather than zeros.
|
|
|
|
To minimize the defects from numerical evaluation of exponents
|
|
with very large or small arguments, the softmax implementation
|
|
uses the following a numerically stable definition:
|
|
|
|
S_d[i, j] = exp(X[i, j] - maxX_d) / sum(exp(X[I_d[k]] - maxX_d), k=0...X.shape[d]-1)
|
|
|
|
where
|
|
|
|
maxX_d = max(X[I_d[k]], k=0...X.shape[d]-1)
|
|
|
|
is the maximum tensor along the direction d (it has dimensionality
|
|
`maxX_d.ndim = X.ndim - 1`).
|
|
|
|
For the example sparse tensor X, we have:
|
|
|
|
S_0._indices() == S_1._indices() == X._indices()
|
|
|
|
maxX_0 = [11, 22, -inf, 24, 15]
|
|
maxX_1 = [15, 24]
|
|
|
|
S_0._values() = [exp(11 - maxX_0[0]) / exp(11 - maxX_0[0]),
|
|
exp(14 - maxX_0[3]) / (exp(14 - maxX_0[3]) + exp(24 - maxX_0[3])),
|
|
exp(15 - maxX_0[4]) / exp(15 - maxX_0[4]),
|
|
exp(22 - maxX_0[1]) / exp(22 - maxX_0[1]),
|
|
exp(24 - maxX_0[3]) / (exp(14 - maxX_0[3]) + exp(24 - maxX_0[3]))]
|
|
= [1, exp(-10)/(exp(-10) + 1), 1, 1, 1/(exp(-10) + 1)]
|
|
|
|
(note that `maxX_0[2] == -inf` not used to obtain S_0)
|
|
|
|
S_1._values() = [exp(11 - maxX_1[0]) / (exp(11 - maxX_1[0]) + exp(14 - maxX_1[0]) + exp(15 - maxX_1[0])),
|
|
exp(14 - maxX_1[0]) / (exp(11 - maxX_1[0]) + exp(14 - maxX_1[0]) + exp(15 - maxX_1[0])),
|
|
exp(15 - maxX_1[0]) / (exp(11 - maxX_1[0]) + exp(14 - maxX_1[0]) + exp(15 - maxX_1[0])),
|
|
exp(22 - maxX_1[1]) / (exp(22 - maxX_1[1]) + exp(24 - maxX_1[1])),
|
|
exp(24 - maxX_1[1]) / (exp(22 - maxX_1[1]) + exp(24 - maxX_1[1]))]
|
|
= [exp(-4) / (exp(-4) + exp(-1) + 1),
|
|
exp(-1) / (exp(-4) + exp(-1) + 1),
|
|
1 / (exp(-4) + exp(-1) + 1),
|
|
exp(-2) / (exp(-2) + 1),
|
|
1 / (exp(-2) + 1)]
|
|
|
|
To obtain the above via the for-loop over
|
|
`nnz(=len(X._values()))`, we introduce the indices mapping `pool`
|
|
as follows:
|
|
|
|
indices = X._indices()
|
|
for i in range(nnz):
|
|
for j in range(nnz):
|
|
if indices[d, i] == indices[d, j]:
|
|
assert pool_d[i] == pool_d[j]
|
|
else:
|
|
assert pool_d[i] != pool_d[j]
|
|
|
|
that is, the entries with values indices i and j are in the same
|
|
pool iff their locations in the grid of tensor indices align with
|
|
the direction along which the softmax is calculated. The `pool`
|
|
mapping maps the X._values() indices to the corresponding pool
|
|
index.
|
|
|
|
To save memory and processor resources, we pre-compute the entries
|
|
of maxX tensor and the sums of exponents as follows:
|
|
|
|
mx_d = [max(values[i] for i in range(nnz) if pool_0[i] == k) for k in pool_d]
|
|
exp_sum_d = [sum(exp(values[i] - mx_d[k]) for i in range(nnz) if pool_d[i] == k) for k in pool_d]
|
|
|
|
For example, if
|
|
|
|
pool_0 = [0, 1, 2, 3, 1]
|
|
pool_1 = [0, 0, 0, 1, 1]
|
|
|
|
then
|
|
|
|
mx_0 = [11, 24, 15, 22]
|
|
mx_1 = [15, 24]
|
|
exp_sum_0 = [1, (exp(-10) + 1), 1, 1]
|
|
exp_sum_1 = [(exp(-4) + exp(-1) + 1), (exp(-2) + 1)]
|
|
|
|
and
|
|
|
|
S_0._values() = [exp(11 - mx_0[pool_0[0]]) / exp_sum_0[pool_0[0]]
|
|
exp(14 - mx_0[pool_0[1]]) / exp_sum_0[pool_0[1]]
|
|
exp(15 - mx_0[pool_0[2]]) / exp_sum_0[pool_0[2]]
|
|
exp(22 - mx_0[pool_0[3]]) / exp_sum_0[pool_0[3]]
|
|
exp(24 - mx_0[pool_0[4]]) / exp_sum_0[pool_0[4]]
|
|
|
|
or in general,
|
|
|
|
S_d._values() = [exp(values[i] - mx_d[pool_d[i]]) / exp_sum_d[pool_d[i] for i in range(nnz)]
|
|
|
|
The above algorithm can be easily extended for cases with
|
|
non-scalar dense part of the sparse tensor where all scalar
|
|
operations become element-wise tensor operations.
|
|
|
|
The implementation below has more optimizations such as that
|
|
collect pool indices for enabling concurrency, minimize the calls
|
|
to exp functions as well as reuse of softmax implementation for
|
|
log_softmax.
|
|
*/
|
|
auto sparse_dim = input.sparse_dim();
|
|
auto indices = input._indices().contiguous();
|
|
auto values = input._values().contiguous();
|
|
auto out_values = output._values();
|
|
auto out_indices = output._indices();
|
|
out_values.resize_as_(values);
|
|
out_indices.resize_as_(indices);
|
|
out_indices.copy_(indices);
|
|
|
|
if (dim >= sparse_dim) {
|
|
if (LogSoftMax) {
|
|
auto new_values =
|
|
at::cpu::_log_softmax(values, dim - sparse_dim + 1, false);
|
|
out_values.set_(new_values);
|
|
} else {
|
|
auto new_values = at::cpu::_softmax(values, dim - sparse_dim + 1, false);
|
|
out_values.set_(new_values);
|
|
}
|
|
return;
|
|
}
|
|
|
|
auto nnz = values.size(0);
|
|
auto sizes = input.sizes();
|
|
auto nvalues = get_nvalues(sizes, sparse_dim);
|
|
|
|
/* Prepare accessors */
|
|
auto values_2 = values.view({nnz, nvalues});
|
|
auto values_accessor = values_2.accessor<scalar_t, 2>();
|
|
|
|
auto out_values_2 = out_values.view({nnz, nvalues});
|
|
auto out_values_accessor = out_values_2.accessor<scalar_t, 2>();
|
|
|
|
/* Compute independent pools of indices */
|
|
auto pools = get_pools(indices, sizes, dim);
|
|
|
|
int64_t grain_size = 1;
|
|
parallel_for(0, pools.size(), grain_size, [&](int64_t begin, int64_t end) {
|
|
for (const auto p : c10::irange(begin, end)) {
|
|
auto pool_indices = pools[p];
|
|
|
|
// Skip empty pools
|
|
if (pool_indices.empty())
|
|
continue;
|
|
|
|
/* Prepare scratch space */
|
|
std::vector<scalar_t> mx_row(nvalues, -std::numeric_limits<scalar_t>::infinity());
|
|
std::vector<scalar_t> exp_sums_row(nvalues, 0);
|
|
|
|
/* Compute mx */
|
|
for (int64_t i : pool_indices) {
|
|
auto values_row = values_accessor[i];
|
|
for (const auto j : c10::irange(nvalues)) {
|
|
mx_row[j] = std::max(mx_row[j], values_row[j]);
|
|
}
|
|
}
|
|
|
|
/* Apply exp to (v - mx) and sum the results */
|
|
for (int64_t i : pool_indices) {
|
|
auto values_row = values_accessor[i];
|
|
auto out_values_row = out_values_accessor[i];
|
|
for (const auto j : c10::irange(nvalues)) {
|
|
auto v = std::exp(values_row[j] - mx_row[j]);
|
|
if (!LogSoftMax) {
|
|
out_values_row[j] = v;
|
|
}
|
|
exp_sums_row[j] += v;
|
|
}
|
|
}
|
|
|
|
for (const auto j : c10::irange(nvalues)) {
|
|
if (LogSoftMax) {
|
|
mx_row[j] += std::log(exp_sums_row[j]);
|
|
} else {
|
|
exp_sums_row[j] = 1.0 / exp_sums_row[j];
|
|
}
|
|
}
|
|
|
|
/* Normalize with the sum of exponents */
|
|
for (int64_t i : pool_indices) {
|
|
auto values_row = values_accessor[i];
|
|
auto out_values_row = out_values_accessor[i];
|
|
for (const auto j : c10::irange(nvalues)) {
|
|
if (LogSoftMax) {
|
|
out_values_row[j] = values_row[j] - mx_row[j];
|
|
} else {
|
|
out_values_row[j] *= exp_sums_row[j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
template <typename scalar_t, bool LogSoftMax>
|
|
void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& grad, const Tensor& output, const int64_t dim, ScalarType input_dtype) {
|
|
/*
|
|
|
|
If LogSoftMax == false, then
|
|
|
|
gI_i = sum_j d<output_j>/d<input_i> * grad_j = sum_j output_i * (1[i==j] - output_j) * grad_j
|
|
= output_i * (grad_i - sum_j output_j * grad_j)
|
|
|
|
else
|
|
|
|
gI_i = (1-exp(output_i)) * grad_i - sum_{j} 1[i!=j] * exp(output_i) * grad_j
|
|
= grad_i - exp(output_i) * sum_j grad_j.
|
|
|
|
where
|
|
|
|
i, j in range(shape[dim])
|
|
x_i = x[..., i_dim, ...]
|
|
output.sparse_dim() == grad.sparse_dim()
|
|
*/
|
|
auto sparse_dim = output.sparse_dim();
|
|
auto sizes = output.sizes().vec();
|
|
auto grad_indices = grad._indices().contiguous();
|
|
auto grad_values = grad._values().contiguous();
|
|
auto out_indices = output._indices().contiguous();
|
|
auto out_values = output._values().contiguous();
|
|
auto values = grad_input._values();
|
|
auto indices = grad_input._indices();
|
|
auto out_nnz = out_values.size(0);
|
|
auto grad_nnz = grad_values.size(0);
|
|
|
|
values.resize_as_(out_values);
|
|
values.zero_();
|
|
indices.resize_as_(out_indices);
|
|
indices.copy_(out_indices);
|
|
|
|
auto out_offsets = get_offsets(out_indices, sizes, -1);
|
|
auto grad_offsets = get_offsets(grad_indices, sizes, -1);
|
|
|
|
if (dim >= sparse_dim) {
|
|
if (out_offsets == grad_offsets) {
|
|
if (LogSoftMax) {
|
|
auto r = at::cpu::_log_softmax_backward_data(
|
|
grad_values, out_values, dim - sparse_dim + 1, input_dtype);
|
|
values.set_(r);
|
|
} else {
|
|
auto r = at::cpu::_softmax_backward_data(grad_values, out_values, dim - sparse_dim + 1, input_dtype);
|
|
values.set_(r);
|
|
}
|
|
} else {
|
|
for (const auto i : c10::irange(out_nnz)) {
|
|
auto low = std::lower_bound(grad_offsets.begin(), grad_offsets.end(), out_offsets[i]);
|
|
auto j = low - grad_offsets.begin();
|
|
if (j < grad_nnz && out_offsets[i] == grad_offsets[j]) {
|
|
if (LogSoftMax) {
|
|
auto r = at::cpu::_log_softmax_backward_data(
|
|
grad_values[j], out_values[i], dim - sparse_dim, input_dtype);
|
|
values[i].copy_(r);
|
|
} else {
|
|
auto r = at::cpu::_softmax_backward_data(grad_values[j], out_values[i], dim - sparse_dim, input_dtype);
|
|
values[i].copy_(r);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
auto nnz = values.size(0);
|
|
auto nvalues = get_nvalues(sizes, sparse_dim);
|
|
|
|
auto values_2 = values.view({nnz, nvalues});
|
|
auto values_accessor = values_2.accessor<scalar_t, 2>();
|
|
|
|
auto out_values_2 = out_values.view({out_nnz, nvalues});
|
|
auto out_values_accessor = out_values_2.accessor<scalar_t, 2>();
|
|
|
|
auto grad_values_2 = grad_values.view({grad_nnz, nvalues});
|
|
auto grad_values_accessor = grad_values_2.accessor<scalar_t, 2>();
|
|
|
|
/* Compute independent pools of indices */
|
|
auto pools = get_pools(out_indices, sizes, dim);
|
|
|
|
int64_t grain_size = 1;
|
|
parallel_for(0, pools.size(), grain_size, [&](int64_t begin, int64_t end) {
|
|
for (const auto p : c10::irange(begin, end)) {
|
|
auto pool_indices = pools[p];
|
|
|
|
// Skip empty pools
|
|
if (pool_indices.empty())
|
|
continue;
|
|
|
|
std::vector<scalar_t> tmp_row(nvalues, 0);
|
|
|
|
/* Compute tmp = - sum_j output_j * grad_j */
|
|
for (int64_t i : pool_indices) {
|
|
auto out_values_row = out_values_accessor[i];
|
|
auto low = std::lower_bound(grad_offsets.begin(), grad_offsets.end(), out_offsets[i]);
|
|
auto j = low - grad_offsets.begin();
|
|
|
|
if (j < grad_nnz && (out_offsets[i] == grad_offsets[j])) {
|
|
auto grad_values_row = grad_values_accessor[j];
|
|
for (const auto k : c10::irange(nvalues)) {
|
|
if (LogSoftMax) {
|
|
tmp_row[k] -= grad_values_row[k];
|
|
} else {
|
|
tmp_row[k] -= out_values_row[k] * grad_values_row[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Compute grad_input = output * (grad + tmp)*/
|
|
for (int64_t i : pool_indices) {
|
|
auto out_values_row = out_values_accessor[i];
|
|
auto values_row = values_accessor[i];
|
|
auto low = std::lower_bound(grad_offsets.begin(), grad_offsets.end(), out_offsets[i]);
|
|
auto j = low - grad_offsets.begin();
|
|
|
|
if (j < grad_nnz && (out_offsets[i] == grad_offsets[j])) {
|
|
auto grad_values_row = grad_values_accessor[j];
|
|
for (const auto k : c10::irange(nvalues)) {
|
|
if (LogSoftMax) {
|
|
values_row[k] = grad_values_row[k] + std::exp(out_values_row[k]) * tmp_row[k];
|
|
} else {
|
|
values_row[k] = out_values_row[k] * (grad_values_row[k] + tmp_row[k]);
|
|
}
|
|
}
|
|
} else {
|
|
for (const auto k : c10::irange(nvalues)) {
|
|
if (LogSoftMax) {
|
|
values_row[k] = std::exp(out_values_row[k]) * tmp_row[k];
|
|
} else {
|
|
values_row[k] = out_values_row[k] * (tmp_row[k]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
Tensor softmax_sparse_cpu(
|
|
const Tensor& input_,
|
|
const int64_t dim_,
|
|
const bool half_to_float) {
|
|
Tensor input, output;
|
|
int64_t dim;
|
|
std::tie(input, output, dim) = softmax_sparse_input_preprocessing(
|
|
input_, dim_, half_to_float, "softmax");
|
|
if (input.numel() == 0) {
|
|
return output;
|
|
}
|
|
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "softmax", [&] {
|
|
cpu_sparse_coo_softmax<scalar_t, false>(output, input, dim);
|
|
});
|
|
return output;
|
|
}
|
|
|
|
Tensor log_softmax_sparse_cpu(
|
|
const Tensor& input_,
|
|
const int64_t dim_,
|
|
const bool half_to_float) {
|
|
Tensor input, output;
|
|
int64_t dim;
|
|
std::tie(input, output, dim) = softmax_sparse_input_preprocessing(
|
|
input_, dim_, half_to_float, "log_softmax");
|
|
if (input.numel() == 0) {
|
|
return output;
|
|
}
|
|
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "log_softmax", [&] {
|
|
cpu_sparse_coo_softmax<scalar_t, true>(output, input, dim);
|
|
});
|
|
return output;
|
|
}
|
|
|
|
Tensor softmax_backward_sparse_cpu(
|
|
const Tensor& grad_,
|
|
const Tensor& output_,
|
|
int64_t dim_,
|
|
const Tensor& input_) {
|
|
Tensor grad_input, grad, output;
|
|
int64_t dim;
|
|
std::tie(grad_input, grad, output, dim) =
|
|
softmax_backward_sparse_input_preprocessing(
|
|
grad_, output_, dim_, input_, "softmax_backward");
|
|
if (output.numel() == 0) {
|
|
return grad_input;
|
|
}
|
|
AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "softmax_backward", [&] {
|
|
cpu_sparse_coo_softmax_backward<scalar_t, false>(
|
|
grad_input, grad, output, dim_, input_.scalar_type());
|
|
});
|
|
return grad_input;
|
|
}
|
|
|
|
Tensor log_softmax_backward_sparse_cpu(
|
|
const Tensor& grad_,
|
|
const Tensor& output_,
|
|
int64_t dim_,
|
|
const Tensor& input_) {
|
|
Tensor grad_input, grad, output;
|
|
int64_t dim;
|
|
std::tie(grad_input, grad, output, dim) =
|
|
softmax_backward_sparse_input_preprocessing(
|
|
grad_, output_, dim_, input_, "log_softmax_backward");
|
|
if (output.numel() == 0) {
|
|
return grad_input;
|
|
}
|
|
AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "log_softmax_backward", [&] {
|
|
cpu_sparse_coo_softmax_backward<scalar_t, true>(
|
|
grad_input, grad, output, dim_, input_.scalar_type());
|
|
});
|
|
return grad_input;
|
|
}
|
|
|
|
static Tensor _sparse_softmax(const Tensor& input_, const int64_t dim_) {
|
|
auto result = [&]() {
|
|
NoNamesGuard guard;
|
|
return at::_sparse_softmax(input_, dim_, false);
|
|
}();
|
|
namedinference::propagate_names(result, input_);
|
|
return result;
|
|
}
|
|
|
|
Tensor _sparse_softmax(const Tensor& input_, const int64_t dim_, c10::optional<ScalarType> dtype) {
|
|
auto result = [&]() {
|
|
NoNamesGuard guard;
|
|
if (input_.is_cuda() && input_.scalar_type() == ScalarType::Half && dtype == ScalarType::Float){
|
|
return at::_sparse_softmax(input_, dim_, true);
|
|
} else {
|
|
Tensor converted = dtype.has_value() ? input_.toType(dtype.value()) : input_;
|
|
return at::_sparse_softmax(converted, dim_, false);
|
|
}
|
|
}();
|
|
namedinference::propagate_names(result, input_);
|
|
return result;
|
|
}
|
|
|
|
Tensor _sparse_softmax(const Tensor& self, Dimname dim, optional<ScalarType> dtype) {
|
|
return at::_sparse_softmax(self, dimname_to_position(self, dim), dtype);
|
|
}
|
|
|
|
static Tensor _sparse_log_softmax(const Tensor& input_, const int64_t dim_) {
|
|
auto result = [&]() {
|
|
NoNamesGuard guard;
|
|
return at::_sparse_log_softmax(input_, dim_, false);
|
|
}();
|
|
namedinference::propagate_names(result, input_);
|
|
return result;
|
|
}
|
|
|
|
Tensor _sparse_log_softmax(const Tensor& input_, const int64_t dim_, c10::optional<ScalarType> dtype) {
|
|
auto result = [&]() {
|
|
NoNamesGuard guard;
|
|
if (input_.is_cuda() && input_.scalar_type() == ScalarType::Half && dtype == ScalarType::Float){
|
|
return at::_sparse_log_softmax(input_, dim_, true);
|
|
} else {
|
|
Tensor converted = dtype.has_value() ? input_.toType(dtype.value()) : input_;
|
|
return at::_sparse_log_softmax(converted, dim_, false);
|
|
}
|
|
}();
|
|
namedinference::propagate_names(result, input_);
|
|
return result;
|
|
}
|
|
|
|
Tensor _sparse_log_softmax(const Tensor& self, Dimname dim, optional<ScalarType> dtype) {
|
|
return at::_sparse_log_softmax(self, dimname_to_position(self, dim), dtype);
|
|
}
|
|
|
|
} // namespace at::native
|