pytorch/caffe2/operators/unsafe_coalesce.h

71 lines
2.5 KiB
C++

#ifndef CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_
#define CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_
#include "caffe2/core/context.h"
#include "caffe2/core/export_caffe2_op_to_c10.h"
#include <c10/util/irange.h>
#include "caffe2/core/operator.h"
namespace caffe2 {
template <class Context>
class UnsafeCoalesceOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
using Operator<Context>::Operator;
bool RunOnDevice() override {
size_t coalesced_size = 0;
for (const auto i : c10::irange(InputSize())) {
// For now only float type is supported
CAFFE_ENFORCE(
Input(i).dtype().template Match<float>(),
"Must only coalesce float type, error at input: ",
i);
}
for (const auto i : c10::irange(InputSize())) {
coalesced_size += Input(i).numel();
}
auto* coalesced = Output(OutputSize() - 1, coalesced_size, at::dtype<float>());
auto coalesced_data = coalesced->template mutable_data<float>();
size_t coalesced_offset = 0;
for (const auto i : c10::irange(InputSize())) {
const auto num_elems = Input(i).numel();
auto input_sizes = Input(i).sizes().vec();
// Don't do anything if both tensors are already pointing on the same data
auto input_data = Input(i).template data<float>();
if (input_data != coalesced_data + coalesced_offset) {
// Make sure that we don't run operation on the same tensor
CAFFE_ENFORCE_NE(
input_data - Input(i).unsafeGetTensorImpl()->storage_offset(),
coalesced_data -
Output(OutputSize() - 1)
->unsafeGetTensorImpl()
->storage_offset(),
"Tensors used in UnsafeCoalesce operator cannot share storage, unless it's inplace operation");
context_.CopyItemsSameDevice(
Input(i).dtype(),
num_elems,
input_data,
coalesced_data + coalesced_offset);
// Note: this could cause Input(i) to free it's data if
// Output(i) and Input(i) alias each other. This is safe on a
// GPU (as the copy will happen-before the free), but it's
// worth mentioning.
OperatorBase::SetOutputTensor(i, coalesced->Alias());
Output(i)->unsafeGetTensorImpl()->set_storage_offset(coalesced_offset);
Output(i)->Resize(input_sizes);
}
coalesced_offset += num_elems;
}
return true;
}
};
} // namespace caffe2
#endif /* CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_ */