170 lines
5.3 KiB
C++
170 lines
5.3 KiB
C++
/**
|
|
* Copyright (c) 2016-present, Facebook, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#pragma once
|
|
|
|
#include <string>
|
|
|
|
#include "caffe2/core/blob_serialization.h"
|
|
#include "caffe2/core/init.h"
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/core/net.h"
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/utils/string_utils.h"
|
|
#include "c10/util/string_utils.h"
|
|
#include <c10/util/irange.h>
|
|
|
|
using std::map;
|
|
using std::shared_ptr;
|
|
using std::string;
|
|
using std::vector;
|
|
|
|
template <typename ContextType, typename TensorType>
|
|
void writeTextOutput(
|
|
TensorType* tensor,
|
|
const string& output_prefix,
|
|
const string& name,
|
|
int index,
|
|
int num_blobs) {
|
|
if (index >= num_blobs) {
|
|
return;
|
|
}
|
|
string filename = name;
|
|
std::replace(filename.begin(), filename.end(), '/', '_');
|
|
string output_name = output_prefix + "/" + filename + ".txt";
|
|
caffe2::TensorSerializer ser;
|
|
caffe2::BlobProto blob_proto;
|
|
|
|
ser.Serialize(
|
|
*tensor, output_name, blob_proto.mutable_tensor(), 0, tensor->numel());
|
|
blob_proto.set_name(output_name);
|
|
blob_proto.set_type("Tensor");
|
|
CAFFE_ENFORCE(blob_proto.has_tensor());
|
|
caffe2::TensorProto tensor_proto = blob_proto.tensor();
|
|
int dims_size = tensor_proto.dims_size();
|
|
long long elem_dim_size =
|
|
dims_size > 1 ? tensor_proto.dims(1) : tensor_proto.dims(0);
|
|
for (const auto i : c10::irange(2, dims_size)) {
|
|
elem_dim_size *= tensor_proto.dims(i);
|
|
}
|
|
std::vector<std::string> lines;
|
|
std::string dims;
|
|
for (const auto i : c10::irange(dims_size)) {
|
|
int dim = tensor_proto.dims(i);
|
|
if (i > 0) {
|
|
dims += ", ";
|
|
}
|
|
dims += c10::to_string(dim);
|
|
}
|
|
lines.push_back(dims);
|
|
std::stringstream line;
|
|
if (tensor_proto.data_type() == caffe2::TensorProto::FLOAT) {
|
|
auto start = tensor_proto.float_data().begin();
|
|
auto end = tensor_proto.float_data().end();
|
|
copy(start, end, std::ostream_iterator<float>(line, ","));
|
|
} else if (tensor_proto.data_type() == caffe2::TensorProto::INT32) {
|
|
auto start = tensor_proto.int32_data().begin();
|
|
auto end = tensor_proto.int32_data().end();
|
|
copy(start, end, std::ostream_iterator<int>(line, ","));
|
|
} else {
|
|
CAFFE_THROW("Unimplemented Blob type.");
|
|
}
|
|
// remove the last ,
|
|
string str = line.str();
|
|
if(str.length() != 0) {
|
|
str.pop_back();
|
|
}
|
|
lines.push_back(str);
|
|
|
|
// static casts are workaround for MSVC build
|
|
auto flags = static_cast<std::ios_base::openmode>(std::ios::out);
|
|
if (index != 0) {
|
|
flags |= static_cast<std::ios_base::openmode>(std::ios::app);
|
|
} else {
|
|
flags |= static_cast<std::ios_base::openmode>(std::ios::trunc);
|
|
}
|
|
std::ofstream output_file(output_name, flags);
|
|
std::ostream_iterator<std::string> output_iterator(output_file, "\n");
|
|
std::copy(lines.begin(), lines.end(), output_iterator);
|
|
}
|
|
|
|
void observerConfig();
|
|
bool backendCudaSet(const string&);
|
|
void setDeviceType(caffe2::NetDef*, caffe2::DeviceType&);
|
|
void setOperatorEngine(caffe2::NetDef*, const string&);
|
|
int loadInput(
|
|
shared_ptr<caffe2::Workspace> workspace,
|
|
const bool run_on_gpu,
|
|
map<string, caffe2::TensorProtos>& tensor_protos_map,
|
|
const string& input,
|
|
const string& input_file,
|
|
const string& input_dims,
|
|
const string& input_type);
|
|
void fillInputBlob(
|
|
shared_ptr<caffe2::Workspace> workspace,
|
|
map<string, caffe2::TensorProtos>& tensor_protos_map,
|
|
int iteration);
|
|
void writeOutput(
|
|
shared_ptr<caffe2::Workspace> workspace,
|
|
const bool run_on_gpu,
|
|
const string& output,
|
|
const string& output_folder,
|
|
const bool text_output,
|
|
const int index,
|
|
const int num_blobs);
|
|
void logBenchmarkResult(
|
|
const std::string& type,
|
|
const std::string& metric,
|
|
const std::string& unit,
|
|
const int value);
|
|
long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory);
|
|
void runNetwork(
|
|
shared_ptr<caffe2::Workspace> workspace,
|
|
caffe2::NetBase* net,
|
|
map<string, caffe2::TensorProtos>& tensor_protos_map,
|
|
const bool wipe_cache,
|
|
const bool run_individual,
|
|
const bool run_on_gpu,
|
|
const bool text_output,
|
|
const int warmup,
|
|
const int iter,
|
|
const int num_blobs,
|
|
const int sleep_before_run,
|
|
const int sleep_between_iteration,
|
|
const int sleep_between_net_and_operator,
|
|
const std::string& output,
|
|
const std::string& output_folder);
|
|
int benchmark(
|
|
int argc,
|
|
char* argv[],
|
|
const string& FLAGS_backend,
|
|
const string& FLAGS_init_net,
|
|
const string& FLAGS_input,
|
|
const string& FLAGS_input_dims,
|
|
const string& FLAGS_input_file,
|
|
const string& FLAGS_input_type,
|
|
int FLAGS_iter,
|
|
bool FLAGS_measure_memory,
|
|
const string& FLAGS_net,
|
|
const string& FLAGS_output,
|
|
const string& FLAGS_output_folder,
|
|
bool FLAGS_run_individual,
|
|
int FLAGS_sleep_before_run,
|
|
int FLAGS_sleep_between_iteration,
|
|
int FLAGS_sleep_between_net_and_operator,
|
|
bool FLAGS_text_output,
|
|
int FLAGS_warmup,
|
|
bool FLAGS_wipe_cache);
|