pytorch/binaries/benchmark_helper.h

/**
 * Copyright (c) 2016-present, Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once

#include <string>

#include "caffe2/core/blob_serialization.h"
#include "caffe2/core/init.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/net.h"
#include "caffe2/core/operator.h"
#include "caffe2/utils/string_utils.h"
#include "c10/util/string_utils.h"
#include <c10/util/irange.h>

using std::map;
using std::shared_ptr;
using std::string;
using std::vector;

template <typename ContextType, typename TensorType>
void writeTextOutput(
    TensorType* tensor,
    const string& output_prefix,
    const string& name,
    int index,
    int num_blobs) {
  if (index >= num_blobs) {
    return;
  }
  string filename = name;
  std::replace(filename.begin(), filename.end(), '/', '_');
  string output_name = output_prefix + "/" + filename + ".txt";
  caffe2::TensorSerializer ser;
  caffe2::BlobProto blob_proto;

  ser.Serialize(
      *tensor, output_name, blob_proto.mutable_tensor(), 0, tensor->numel());
  blob_proto.set_name(output_name);
  blob_proto.set_type("Tensor");
  CAFFE_ENFORCE(blob_proto.has_tensor());
  caffe2::TensorProto tensor_proto = blob_proto.tensor();
  int dims_size = tensor_proto.dims_size();
  long long elem_dim_size =
      dims_size > 1 ? tensor_proto.dims(1) : tensor_proto.dims(0);
  for (const auto i : c10::irange(2, dims_size)) {
    elem_dim_size *= tensor_proto.dims(i);
  }
  std::vector<std::string> lines;
  std::string dims;
  for (const auto i : c10::irange(dims_size)) {
    int dim = tensor_proto.dims(i);
    if (i > 0) {
      dims += ", ";
    }
    dims += c10::to_string(dim);
  }
  lines.push_back(dims);
  std::stringstream line;
  if (tensor_proto.data_type() == caffe2::TensorProto::FLOAT) {
    auto start = tensor_proto.float_data().begin();
    auto end = tensor_proto.float_data().end();
    copy(start, end, std::ostream_iterator<float>(line, ","));
  } else if (tensor_proto.data_type() == caffe2::TensorProto::INT32) {
    auto start = tensor_proto.int32_data().begin();
    auto end = tensor_proto.int32_data().end();
    copy(start, end, std::ostream_iterator<int>(line, ","));
  } else {
    CAFFE_THROW("Unimplemented Blob type.");
  }
  // remove the last ,
  string str = line.str();
  if(str.length() != 0) {
    str.pop_back();
  }
  lines.push_back(str);

  // static casts are workaround for MSVC build
  auto flags = static_cast<std::ios_base::openmode>(std::ios::out);
  if (index != 0) {
    flags |= static_cast<std::ios_base::openmode>(std::ios::app);
  } else {
    flags |= static_cast<std::ios_base::openmode>(std::ios::trunc);
  }
  std::ofstream output_file(output_name, flags);
  std::ostream_iterator<std::string> output_iterator(output_file, "\n");
  std::copy(lines.begin(), lines.end(), output_iterator);
}

void observerConfig();
bool backendCudaSet(const string&);
void setDeviceType(caffe2::NetDef*, caffe2::DeviceType&);
void setOperatorEngine(caffe2::NetDef*, const string&);
int loadInput(
    shared_ptr<caffe2::Workspace> workspace,
    const bool run_on_gpu,
    map<string, caffe2::TensorProtos>& tensor_protos_map,
    const string& input,
    const string& input_file,
    const string& input_dims,
    const string& input_type);
void fillInputBlob(
    shared_ptr<caffe2::Workspace> workspace,
    map<string, caffe2::TensorProtos>& tensor_protos_map,
    int iteration);
void writeOutput(
    shared_ptr<caffe2::Workspace> workspace,
    const bool run_on_gpu,
    const string& output,
    const string& output_folder,
    const bool text_output,
    const int index,
    const int num_blobs);
void logBenchmarkResult(
    const std::string& type,
    const std::string& metric,
    const std::string& unit,
    const int value);
long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory);
void runNetwork(
    shared_ptr<caffe2::Workspace> workspace,
    caffe2::NetBase* net,
    map<string, caffe2::TensorProtos>& tensor_protos_map,
    const bool wipe_cache,
    const bool run_individual,
    const bool run_on_gpu,
    const bool text_output,
    const int warmup,
    const int iter,
    const int num_blobs,
    const int sleep_before_run,
    const int sleep_between_iteration,
    const int sleep_between_net_and_operator,
    const std::string& output,
    const std::string& output_folder);
int benchmark(
    int argc,
    char* argv[],
    const string& FLAGS_backend,
    const string& FLAGS_init_net,
    const string& FLAGS_input,
    const string& FLAGS_input_dims,
    const string& FLAGS_input_file,
    const string& FLAGS_input_type,
    int FLAGS_iter,
    bool FLAGS_measure_memory,
    const string& FLAGS_net,
    const string& FLAGS_output,
    const string& FLAGS_output_folder,
    bool FLAGS_run_individual,
    int FLAGS_sleep_before_run,
    int FLAGS_sleep_between_iteration,
    int FLAGS_sleep_between_net_and_operator,
    bool FLAGS_text_output,
    int FLAGS_warmup,
    bool FLAGS_wipe_cache);