commit
e5ae8f15ad
|
@ -0,0 +1,115 @@
|
|||
build --cxxopt=--std=c++17
|
||||
build --copt=-I.
|
||||
# Bazel does not support including its cc_library targets as system
|
||||
# headers. We work around this for generated code
|
||||
# (e.g. c10/macros/cmake_macros.h) by making the generated directory a
|
||||
# system include path.
|
||||
build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
|
||||
build --copt=-isystem --copt bazel-out/darwin-fastbuild/bin
|
||||
build --experimental_ui_max_stdouterr_bytes=2048576
|
||||
|
||||
# Configuration to disable tty features for environments like CI
|
||||
build:no-tty --curses no
|
||||
build:no-tty --progress_report_interval 10
|
||||
build:no-tty --show_progress_rate_limit 10
|
||||
|
||||
# Build with GPU support by default.
|
||||
build --define=cuda=true
|
||||
# rules_cuda configuration
|
||||
build --@rules_cuda//cuda:enable_cuda
|
||||
build --@rules_cuda//cuda:cuda_targets=sm_52
|
||||
build --@rules_cuda//cuda:compiler=nvcc
|
||||
build --repo_env=CUDA_PATH=/usr/local/cuda
|
||||
|
||||
# Configuration to build without GPU support
|
||||
build:cpu-only --define=cuda=false
|
||||
# define a separate build folder for faster switching between configs
|
||||
build:cpu-only --platform_suffix=-cpu-only
|
||||
# See the note on the config-less build for details about why we are
|
||||
# doing this. We must also do it for the "-cpu-only" platform suffix.
|
||||
build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
|
||||
# rules_cuda configuration
|
||||
build:cpu-only --@rules_cuda//cuda:enable_cuda=False
|
||||
|
||||
# Definition of --config=shell
|
||||
# interactive shell immediately before execution
|
||||
build:shell --run_under="//tools/bazel_tools:shellwrap"
|
||||
|
||||
# Disable all warnings for external repositories. We don't care about
|
||||
# their warnings.
|
||||
build --per_file_copt=^external/@-w
|
||||
|
||||
# Set additional warnings to error level.
|
||||
#
|
||||
# Implementation notes:
|
||||
# * we use file extensions to determine if we are using the C++
|
||||
# compiler or the cuda compiler
|
||||
# * we use ^// at the start of the regex to only permit matching
|
||||
# PyTorch files. This excludes external repos.
|
||||
#
|
||||
# Note that because this is logically a command-line flag, it is
|
||||
# considered the word on what warnings are enabled. This has the
|
||||
# unfortunate consequence of preventing us from disabling an error at
|
||||
# the target level because those flags will come before these flags in
|
||||
# the action invocation. Instead we provide per-file exceptions after
|
||||
# this.
|
||||
#
|
||||
# On the bright side, this means we don't have to more broadly apply
|
||||
# the exceptions to an entire target.
|
||||
#
|
||||
# Looking for CUDA flags? We have a cu_library macro that we can edit
|
||||
# directly. Look in //tools/rules:cu.bzl for details. Editing the
|
||||
# macro over this has the following advantages:
|
||||
# * making changes does not require discarding the Bazel analysis
|
||||
# cache
|
||||
# * it allows for selective overrides on individual targets since the
|
||||
# macro-level opts will come earlier than target level overrides
|
||||
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
|
||||
# The following warnings come from -Wall. We downgrade them from error
|
||||
# to warnings here.
|
||||
#
|
||||
# sign-compare has a tremendous amount of violations in the
|
||||
# codebase. It will be a lot of work to fix them, just disable it for
|
||||
# now.
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-sign-compare
|
||||
# We intentionally use #pragma unroll, which is compiler specific.
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
|
||||
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
|
||||
# The following warnings come from -Wextra. We downgrade them from error
|
||||
# to warnings here.
|
||||
#
|
||||
# unused-parameter-compare has a tremendous amount of violations in the
|
||||
# codebase. It will be a lot of work to fix them, just disable it for
|
||||
# now.
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
|
||||
# missing-field-parameters has both a large number of violations in
|
||||
# the codebase, but it also is used pervasively in the Python C
|
||||
# API. There are a couple of catches though:
|
||||
# * we use multiple versions of the Python API and hence have
|
||||
# potentially multiple different versions of each relevant
|
||||
# struct. They may have different numbers of fields. It will be
|
||||
# unwieldy to support multiple versions in the same source file.
|
||||
# * Python itself for many of these structs recommends only
|
||||
# initializing a subset of the fields. We should respect the API
|
||||
# usage conventions of our dependencies.
|
||||
#
|
||||
# Hence, we just disable this warning altogether. We may want to clean
|
||||
# up some of the clear-cut cases that could be risky, but we still
|
||||
# likely want to have this disabled for the most part.
|
||||
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
|
||||
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
|
||||
build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function
|
|
@ -0,0 +1 @@
|
|||
4.2.1
|
|
@ -0,0 +1,25 @@
|
|||
[pt]
|
||||
is_oss=1
|
||||
|
||||
[buildfile]
|
||||
name = BUCK.oss
|
||||
includes = //tools/build_defs/select.bzl
|
||||
|
||||
[repositories]
|
||||
bazel_skylib = third_party/bazel-skylib/
|
||||
ovr_config = .
|
||||
|
||||
[download]
|
||||
in_build = true
|
||||
|
||||
[cxx]
|
||||
cxxflags = -std=c++17
|
||||
should_remap_host_platform = true
|
||||
cpp = /usr/bin/clang
|
||||
cc = /usr/bin/clang
|
||||
cxx = /usr/bin/clang++
|
||||
cxxpp = /usr/bin/clang++
|
||||
ld = /usr/bin/clang++
|
||||
|
||||
[project]
|
||||
default_flavors_mode=all
|
|
@ -0,0 +1,14 @@
|
|||
# Jenkins
|
||||
|
||||
The scripts in this directory are the entrypoint for testing Caffe2.
|
||||
|
||||
The environment variable `BUILD_ENVIRONMENT` is expected to be set to
|
||||
the build environment you intend to test. It is a hint for the build
|
||||
and test scripts to configure Caffe2 a certain way and include/exclude
|
||||
tests. Docker images, they equal the name of the image itself. For
|
||||
example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
|
||||
built on Jenkins and are used in triggered builds already have this
|
||||
environment variable set in their manifest. Also see
|
||||
`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
|
||||
|
||||
Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.
|
|
@ -0,0 +1,36 @@
|
|||
set -ex
|
||||
|
||||
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
|
||||
TEST_DIR="$ROOT_DIR/test"
|
||||
gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
|
||||
pytest_reports_dir="${TEST_DIR}/test-reports/python"
|
||||
|
||||
# Figure out which Python to use
|
||||
PYTHON="$(which python)"
|
||||
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
|
||||
PYTHON=$(which "python${BASH_REMATCH[1]}")
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
|
||||
unset HIP_PLATFORM
|
||||
if which sccache > /dev/null; then
|
||||
# Save sccache logs to file
|
||||
sccache --stop-server || true
|
||||
rm -f ~/sccache_error.log || true
|
||||
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
|
||||
|
||||
# Report sccache stats for easier debugging
|
||||
sccache --zero-stats
|
||||
fi
|
||||
fi
|
||||
|
||||
# /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
|
||||
# builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
|
||||
# that the test code in .ci/test.sh is the same
|
||||
INSTALL_PREFIX="/usr/local/caffe2"
|
||||
|
||||
mkdir -p "$gtest_reports_dir" || true
|
||||
mkdir -p "$pytest_reports_dir" || true
|
||||
mkdir -p "$INSTALL_PREFIX" || true
|
|
@ -0,0 +1,172 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
|
||||
pip install click mock tabulate networkx==2.0
|
||||
pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
|
||||
fi
|
||||
|
||||
# Skip tests in environments where they are not built/applicable
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
echo 'Skipping tests'
|
||||
exit 0
|
||||
fi
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
|
||||
# temporary to locate some kernel issues on the CI nodes
|
||||
export HSAKMT_DEBUG_LEVEL=4
|
||||
fi
|
||||
# These additional packages are needed for circleci ROCm builds.
|
||||
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
|
||||
# Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
|
||||
# defaults installs the most recent networkx version, so we install this lower
|
||||
# version explicitly before scikit-image pulls it in as a dependency
|
||||
pip install networkx==2.0
|
||||
# click - onnx
|
||||
pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
|
||||
fi
|
||||
|
||||
# Find where cpp tests and Caffe2 itself are installed
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
|
||||
# For cmake only build we install everything into /usr/local
|
||||
cpp_test_dir="$INSTALL_PREFIX/cpp_test"
|
||||
ld_library_path="$INSTALL_PREFIX/lib"
|
||||
else
|
||||
# For Python builds we install into python
|
||||
# cd to /usr first so the python import doesn't get confused by any 'caffe2'
|
||||
# directory in cwd
|
||||
python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
|
||||
caffe2_pypath="$python_installation/caffe2"
|
||||
cpp_test_dir="$python_installation/torch/test"
|
||||
ld_library_path="$python_installation/torch/lib"
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# C++ tests #
|
||||
################################################################################
|
||||
# Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
|
||||
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
|
||||
echo "Running C++ tests.."
|
||||
for test in $(find "$cpp_test_dir" -executable -type f); do
|
||||
case "$test" in
|
||||
# skip tests we know are hanging or bad
|
||||
*/mkl_utils_test|*/aten/integer_divider_test)
|
||||
continue
|
||||
;;
|
||||
*/scalar_tensor_test|*/basic|*/native_test)
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
continue
|
||||
else
|
||||
LD_LIBRARY_PATH="$ld_library_path" "$test"
|
||||
fi
|
||||
;;
|
||||
*/*_benchmark)
|
||||
LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
|
||||
;;
|
||||
*)
|
||||
# Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
|
||||
# planning to migrate to gtest as the common PyTorch c++ test suite, we
|
||||
# currently do NOT use the xml test reporter, because Catch doesn't
|
||||
# support multiple reporters
|
||||
# c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
|
||||
# which means that enabling XML output means you lose useful stdout
|
||||
# output for Jenkins. It's more important to have useful console
|
||||
# output than it is to have XML output for Jenkins.
|
||||
# Note: in the future, if we want to use xml test reporter once we switch
|
||||
# to all gtest, one can simply do:
|
||||
LD_LIBRARY_PATH="$ld_library_path" \
|
||||
"$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Python tests #
|
||||
################################################################################
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# If pip is installed as root, we must use sudo.
|
||||
# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
|
||||
PIP=$(which pip)
|
||||
PIP_USER=$(stat --format '%U' $PIP)
|
||||
CURRENT_USER=$(id -u -n)
|
||||
if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
|
||||
MAYBE_SUDO=sudo
|
||||
fi
|
||||
|
||||
# Uninstall pre-installed hypothesis and coverage to use an older version as newer
|
||||
# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
|
||||
$MAYBE_SUDO pip -q uninstall -y hypothesis
|
||||
$MAYBE_SUDO pip -q uninstall -y coverage
|
||||
|
||||
# "pip install hypothesis==3.44.6" from official server is unreliable on
|
||||
# CircleCI, so we host a copy on S3 instead
|
||||
$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
|
||||
$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
|
||||
$MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
|
||||
|
||||
# Collect additional tests to run (outside caffe2/python)
|
||||
EXTRA_TESTS=()
|
||||
|
||||
# CUDA builds always include NCCL support
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
|
||||
EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
|
||||
fi
|
||||
|
||||
rocm_ignore_test=()
|
||||
if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
|
||||
# Currently these tests are failing on ROCM platform:
|
||||
|
||||
# On ROCm, RCCL (distributed) development isn't complete.
|
||||
# https://github.com/ROCmSoftwarePlatform/rccl
|
||||
rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
|
||||
|
||||
# This test has been flaky in ROCm CI (but note the tests are
|
||||
# cpu-only so should be unrelated to ROCm)
|
||||
rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
|
||||
# This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
|
||||
rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
|
||||
# This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
|
||||
rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
|
||||
fi
|
||||
|
||||
echo "Running Python tests.."
|
||||
# locale setting is required by click package
|
||||
for loc in "en_US.utf8" "C.UTF-8"; do
|
||||
if locale -a | grep "$loc" >/dev/null 2>&1; then
|
||||
export LC_ALL="$loc"
|
||||
export LANG="$loc"
|
||||
break;
|
||||
fi
|
||||
done
|
||||
|
||||
# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
|
||||
export DNNL_MAX_CPU_ISA=AVX2
|
||||
|
||||
# Should still run even in the absence of SHARD_NUMBER
|
||||
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
|
||||
# TODO(sdym@meta.com) remove this when the linked issue resolved.
|
||||
# py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
|
||||
pip install --user py==1.11.0
|
||||
pip install --user pytest-sugar
|
||||
# NB: Warnings are disabled because they make it harder to see what
|
||||
# the actual erroring test is
|
||||
"$PYTHON" \
|
||||
-m pytest \
|
||||
-x \
|
||||
-v \
|
||||
--disable-warnings \
|
||||
--junit-xml="$pytest_reports_dir/result.xml" \
|
||||
--ignore "$caffe2_pypath/python/test/executor_test.py" \
|
||||
--ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
|
||||
--ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
|
||||
--ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
|
||||
--ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
|
||||
${rocm_ignore_test[@]} \
|
||||
"$caffe2_pypath/python" \
|
||||
"${EXTRA_TESTS[@]}"
|
||||
fi
|
|
@ -0,0 +1,31 @@
|
|||
# Docker images for Jenkins
|
||||
|
||||
This directory contains everything needed to build the Docker images
|
||||
that are used in our CI
|
||||
|
||||
The Dockerfiles located in subdirectories are parameterized to
|
||||
conditionally run build stages depending on build arguments passed to
|
||||
`docker build`. This lets us use only a few Dockerfiles for many
|
||||
images. The different configurations are identified by a freeform
|
||||
string that we call a _build environment_. This string is persisted in
|
||||
each image as the `BUILD_ENVIRONMENT` environment variable.
|
||||
|
||||
See `build.sh` for valid build environments (it's the giant switch).
|
||||
|
||||
Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definitions.py`
|
||||
|
||||
## Contents
|
||||
|
||||
* `build.sh` -- dispatch script to launch all builds
|
||||
* `common` -- scripts used to execute individual Docker build stages
|
||||
* `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Build a specific image
|
||||
./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
|
||||
|
||||
# Set flags (see build.sh) and build image
|
||||
sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
|
||||
```
|
|
@ -0,0 +1 @@
|
|||
<manifest package="org.pytorch.deps" />
|
|
@ -0,0 +1,66 @@
|
|||
buildscript {
|
||||
ext {
|
||||
minSdkVersion = 21
|
||||
targetSdkVersion = 28
|
||||
compileSdkVersion = 28
|
||||
buildToolsVersion = '28.0.3'
|
||||
|
||||
coreVersion = "1.2.0"
|
||||
extJUnitVersion = "1.1.1"
|
||||
runnerVersion = "1.2.0"
|
||||
rulesVersion = "1.2.0"
|
||||
junitVersion = "4.12"
|
||||
}
|
||||
|
||||
repositories {
|
||||
google()
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
jcenter()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
classpath 'com.android.tools.build:gradle:4.1.2'
|
||||
classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2'
|
||||
}
|
||||
}
|
||||
|
||||
repositories {
|
||||
google()
|
||||
jcenter()
|
||||
}
|
||||
|
||||
apply plugin: 'com.android.library'
|
||||
|
||||
android {
|
||||
compileSdkVersion rootProject.compileSdkVersion
|
||||
buildToolsVersion rootProject.buildToolsVersion
|
||||
|
||||
defaultConfig {
|
||||
minSdkVersion minSdkVersion
|
||||
targetSdkVersion targetSdkVersion
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
manifest.srcFile 'AndroidManifest.xml'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'com.android.support:appcompat-v7:28.0.0'
|
||||
implementation 'androidx.appcompat:appcompat:1.0.0'
|
||||
implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
|
||||
implementation 'com.google.code.findbugs:jsr305:3.0.1'
|
||||
implementation 'com.facebook.soloader:nativeloader:0.10.4'
|
||||
|
||||
implementation 'junit:junit:' + rootProject.junitVersion
|
||||
implementation 'androidx.test:core:' + rootProject.coreVersion
|
||||
|
||||
implementation 'junit:junit:' + rootProject.junitVersion
|
||||
implementation 'androidx.test:core:' + rootProject.coreVersion
|
||||
implementation 'androidx.test.ext:junit:' + rootProject.extJUnitVersion
|
||||
implementation 'androidx.test:rules:' + rootProject.rulesVersion
|
||||
implementation 'androidx.test:runner:' + rootProject.runnerVersion
|
||||
}
|
|
@ -0,0 +1,392 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function extract_version_from_image_name() {
|
||||
eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
|
||||
if [ "x${!2}" = x ]; then
|
||||
echo "variable '$2' not correctly parsed from image='$image'"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function extract_all_from_image_name() {
|
||||
# parts $image into array, splitting on '-'
|
||||
keep_IFS="$IFS"
|
||||
IFS="-"
|
||||
declare -a parts=($image)
|
||||
IFS="$keep_IFS"
|
||||
unset keep_IFS
|
||||
|
||||
for part in "${parts[@]}"; do
|
||||
name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
|
||||
vername="${name^^}_VERSION"
|
||||
# "py" is the odd one out, needs this special case
|
||||
if [ "x${name}" = xpy ]; then
|
||||
vername=ANACONDA_PYTHON_VERSION
|
||||
fi
|
||||
# skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
|
||||
if [ -n "${name}" ]; then
|
||||
extract_version_from_image_name "${name}" "${vername}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Use the same pre-built XLA test image from PyTorch/XLA
|
||||
if [[ "$image" == *xla* ]]; then
|
||||
echo "Using pre-built XLA test image..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$image" == *-bionic* ]]; then
|
||||
UBUNTU_VERSION=18.04
|
||||
elif [[ "$image" == *-focal* ]]; then
|
||||
UBUNTU_VERSION=20.04
|
||||
elif [[ "$image" == *-jammy* ]]; then
|
||||
UBUNTU_VERSION=22.04
|
||||
elif [[ "$image" == *ubuntu* ]]; then
|
||||
extract_version_from_image_name ubuntu UBUNTU_VERSION
|
||||
elif [[ "$image" == *centos* ]]; then
|
||||
extract_version_from_image_name centos CENTOS_VERSION
|
||||
fi
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ]; then
|
||||
OS="ubuntu"
|
||||
elif [ -n "${CENTOS_VERSION}" ]; then
|
||||
OS="centos"
|
||||
else
|
||||
echo "Unable to derive operating system base..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKERFILE="${OS}/Dockerfile"
|
||||
# When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
|
||||
if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
|
||||
DOCKERFILE="${OS}-cuda/Dockerfile"
|
||||
elif [[ "$image" == *rocm* ]]; then
|
||||
DOCKERFILE="${OS}-rocm/Dockerfile"
|
||||
elif [[ "$image" == *linter* ]]; then
|
||||
# Use a separate Dockerfile for linter to keep a small image size
|
||||
DOCKERFILE="linter/Dockerfile"
|
||||
fi
|
||||
|
||||
# CMake 3.18 is needed to support CUDA17 language variant
|
||||
CMAKE_VERSION=3.18.5
|
||||
|
||||
_UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
|
||||
_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
|
||||
|
||||
# It's annoying to rename jobs every time you want to rewrite a
|
||||
# configuration, so we hardcode everything here rather than do it
|
||||
# from scratch
|
||||
case "$image" in
|
||||
pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.6.2
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.7.0
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.8.0
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang7-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang10-onnx)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang7-android-ndk-r19c)
|
||||
ANACONDA_PYTHON_VERSION=3.7
|
||||
CLANG_VERSION=7
|
||||
LLVMDEV=yes
|
||||
PROTOBUF=yes
|
||||
ANDROID=yes
|
||||
ANDROID_NDK_VERSION=r19c
|
||||
GRADLE_VERSION=6.8.3
|
||||
NINJA_VERSION=1.9.0
|
||||
;;
|
||||
pytorch-linux-bionic-py3.8-clang9)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CLANG_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-bionic-py3.11-clang9)
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
CLANG_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-bionic-py3.8-gcc9)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-rocm-n-1-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=5.3
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-rocm-n-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=5.4.2
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3.8-gcc7)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CUDA_VERSION=11.6
|
||||
CUDNN_VERSION=8
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.7-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CUDA_VERSION=11.7
|
||||
CUDNN_VERSION=8
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CUDA_VERSION=11.8
|
||||
CUDNN_VERSION=8
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-focal-linter)
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
echo "image '$image' did not match an existing build configuration"
|
||||
if [[ "$image" == *py* ]]; then
|
||||
extract_version_from_image_name py ANACONDA_PYTHON_VERSION
|
||||
fi
|
||||
if [[ "$image" == *cuda* ]]; then
|
||||
extract_version_from_image_name cuda CUDA_VERSION
|
||||
extract_version_from_image_name cudnn CUDNN_VERSION
|
||||
fi
|
||||
if [[ "$image" == *rocm* ]]; then
|
||||
extract_version_from_image_name rocm ROCM_VERSION
|
||||
NINJA_VERSION=1.9.0
|
||||
fi
|
||||
if [[ "$image" == *centos7* ]]; then
|
||||
NINJA_VERSION=1.10.2
|
||||
fi
|
||||
if [[ "$image" == *gcc* ]]; then
|
||||
extract_version_from_image_name gcc GCC_VERSION
|
||||
fi
|
||||
if [[ "$image" == *clang* ]]; then
|
||||
extract_version_from_image_name clang CLANG_VERSION
|
||||
fi
|
||||
if [[ "$image" == *devtoolset* ]]; then
|
||||
extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
|
||||
fi
|
||||
if [[ "$image" == *glibc* ]]; then
|
||||
extract_version_from_image_name glibc GLIBC_VERSION
|
||||
fi
|
||||
if [[ "$image" == *cmake* ]]; then
|
||||
extract_version_from_image_name cmake CMAKE_VERSION
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
#when using cudnn version 8 install it separately from cuda
|
||||
if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
|
||||
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
|
||||
if [[ ${CUDNN_VERSION} == 8 ]]; then
|
||||
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Build image
|
||||
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
|
||||
# it's no longer needed.
|
||||
docker build \
|
||||
--no-cache \
|
||||
--progress=plain \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
--build-arg "PROTOBUF=${PROTOBUF:-}" \
|
||||
--build-arg "THRIFT=${THRIFT:-}" \
|
||||
--build-arg "LLVMDEV=${LLVMDEV:-}" \
|
||||
--build-arg "DB=${DB:-}" \
|
||||
--build-arg "VISION=${VISION:-}" \
|
||||
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
|
||||
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
|
||||
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
|
||||
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
|
||||
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
|
||||
--build-arg "GCC_VERSION=${GCC_VERSION}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "ANDROID=${ANDROID}" \
|
||||
--build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
|
||||
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
|
||||
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906}" \
|
||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
-f $(dirname ${DOCKERFILE})/Dockerfile \
|
||||
-t "$tmp_tag" \
|
||||
"$@" \
|
||||
.
|
||||
|
||||
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
|
||||
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
|
||||
# find the correct image. As a result, here we have to replace the
|
||||
# "$UBUNTU_VERSION" == "18.04-rc"
|
||||
# with
|
||||
# "$UBUNTU_VERSION" == "18.04"
|
||||
UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
|
||||
|
||||
function drun() {
|
||||
docker run --rm "$tmp_tag" $*
|
||||
}
|
||||
|
||||
if [[ "$OS" == "ubuntu" ]]; then
|
||||
|
||||
if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
|
||||
echo "OS=ubuntu, but:"
|
||||
drun lsb_release -a
|
||||
exit 1
|
||||
fi
|
||||
if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
|
||||
echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
|
||||
drun lsb_release -a
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
|
||||
echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
|
||||
drun python --version
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$GCC_VERSION" ]; then
|
||||
if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
|
||||
echo "GCC_VERSION=$GCC_VERSION, but:"
|
||||
drun gcc --version
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$CLANG_VERSION" ]; then
|
||||
if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
|
||||
echo "CLANG_VERSION=$CLANG_VERSION, but:"
|
||||
drun clang --version
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$KATEX" ]; then
|
||||
if !(drun katex --version); then
|
||||
echo "KATEX=$KATEX, but:"
|
||||
drun katex --version
|
||||
exit 1
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,60 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*)
|
||||
}
|
||||
|
||||
# If UPSTREAM_BUILD_ID is set (see trigger job), then we can
|
||||
# use it to tag this build with the same ID used to tag all other
|
||||
# base image builds. Also, we can try and pull the previous
|
||||
# image first, to avoid rebuilding layers that haven't changed.
|
||||
|
||||
#until we find a way to reliably reuse previous build, this last_tag is not in use
|
||||
# last_tag="$(( CIRCLE_BUILD_NUM - 1 ))"
|
||||
tag="${DOCKER_TAG}"
|
||||
|
||||
|
||||
registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
|
||||
image="${registry}/pytorch/${IMAGE_NAME}"
|
||||
|
||||
login() {
|
||||
aws ecr get-authorization-token --region us-east-1 --output text --query 'authorizationData[].authorizationToken' |
|
||||
base64 -d |
|
||||
cut -d: -f2 |
|
||||
docker login -u AWS --password-stdin "$1"
|
||||
}
|
||||
|
||||
|
||||
# Only run these steps if not on github actions
|
||||
if [[ -z "${GITHUB_ACTIONS}" ]]; then
|
||||
# Retry on timeouts (can happen on job stampede).
|
||||
retry login "${registry}"
|
||||
# Logout on exit
|
||||
trap "docker logout ${registry}" EXIT
|
||||
fi
|
||||
|
||||
# Try to pull the previous image (perhaps we can reuse some layers)
|
||||
# if [ -n "${last_tag}" ]; then
|
||||
# docker pull "${image}:${last_tag}" || true
|
||||
# fi
|
||||
|
||||
# Build new image
|
||||
./build.sh ${IMAGE_NAME} -t "${image}:${tag}"
|
||||
|
||||
# Only push if `DOCKER_SKIP_PUSH` = false
|
||||
if [ "${DOCKER_SKIP_PUSH:-true}" = "false" ]; then
|
||||
# Only push if docker image doesn't exist already.
|
||||
# ECR image tags are immutable so this will avoid pushing if only just testing if the docker jobs work
|
||||
# NOTE: The only workflow that should push these images should be the docker-builds.yml workflow
|
||||
if ! docker manifest inspect "${image}:${tag}" >/dev/null 2>/dev/null; then
|
||||
docker push "${image}:${tag}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
|
||||
trap "rm -rf ${IMAGE_NAME}:${tag}.tar" EXIT
|
||||
docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
|
||||
aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
|
||||
fi
|
|
@ -0,0 +1,111 @@
|
|||
ARG CENTOS_VERSION
|
||||
|
||||
FROM centos:${CENTOS_VERSION}
|
||||
|
||||
ARG CENTOS_VERSION
|
||||
|
||||
# Set AMD gpu targets to build for
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
|
||||
# Install required packages to build Caffe2
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Update CentOS git version
|
||||
RUN yum -y remove git
|
||||
RUN yum -y remove git-*
|
||||
RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm || \
|
||||
(yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
|
||||
sed -i "s/packages.endpoint/packages.endpointdev/" /etc/yum.repos.d/endpoint.repo)
|
||||
RUN yum install -y git
|
||||
|
||||
# Install devtoolset
|
||||
ARG DEVTOOLSET_VERSION
|
||||
COPY ./common/install_devtoolset.sh install_devtoolset.sh
|
||||
RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
|
||||
ENV BASH_ENV "/etc/profile"
|
||||
|
||||
# (optional) Install non-default glibc version
|
||||
ARG GLIBC_VERSION
|
||||
COPY ./common/install_glibc.sh install_glibc.sh
|
||||
RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
|
||||
RUN rm install_glibc.sh
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
COPY ./common/install_protobuf.sh install_protobuf.sh
|
||||
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV and ffmpeg
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh install_vision.sh
|
||||
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# Install rocm
|
||||
ARG ROCM_VERSION
|
||||
COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN rm install_rocm_magma.sh
|
||||
ENV PATH /opt/rocm/bin:$PATH
|
||||
ENV PATH /opt/rocm/hcc/bin:$PATH
|
||||
ENV PATH /opt/rocm/hip/bin:$PATH
|
||||
ENV PATH /opt/rocm/opencl/bin:$PATH
|
||||
ENV PATH /opt/rocm/llvm/bin:$PATH
|
||||
ENV MAGMA_HOME /opt/rocm/magma
|
||||
ENV LANG en_US.utf8
|
||||
ENV LC_ALL en_US.utf8
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Work around bug where devtoolset replaces sudo and breaks it.
|
||||
if [ -n "$DEVTOOLSET_VERSION" ]; then
|
||||
export SUDO=/bin/sudo
|
||||
else
|
||||
export SUDO=sudo
|
||||
fi
|
||||
|
||||
as_jenkins() {
|
||||
# NB: unsetting the environment variables works around a conda bug
|
||||
# https://github.com/conda/conda/issues/6576
|
||||
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
|
||||
# NB: This must be run from a directory that jenkins has access to,
|
||||
# works around https://github.com/conda/conda-package-handling/pull/34
|
||||
$SUDO -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
|
||||
}
|
||||
|
||||
conda_install() {
|
||||
# Ensure that the install command don't upgrade/downgrade Python
|
||||
# This should be called as
|
||||
# conda_install pkg1 pkg2 ... [-c channel]
|
||||
as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
|
||||
}
|
||||
|
||||
conda_run() {
|
||||
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
|
||||
}
|
||||
|
||||
pip_install() {
|
||||
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${ANDROID_NDK}" ]
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends autotools-dev autoconf unzip
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
pushd /tmp
|
||||
curl -Os --retry 3 $_https_amazon_aws/android-ndk-${ANDROID_NDK}-linux-x86_64.zip
|
||||
popd
|
||||
_ndk_dir=/opt/ndk
|
||||
mkdir -p "$_ndk_dir"
|
||||
unzip -qo /tmp/android*.zip -d "$_ndk_dir"
|
||||
_versioned_dir=$(find "$_ndk_dir/" -mindepth 1 -maxdepth 1 -type d)
|
||||
mv "$_versioned_dir"/* "$_ndk_dir"/
|
||||
rmdir "$_versioned_dir"
|
||||
rm -rf /tmp/*
|
||||
|
||||
# Install OpenJDK
|
||||
# https://hub.docker.com/r/picoded/ubuntu-openjdk-8-jdk/dockerfile/
|
||||
|
||||
sudo apt-get update && \
|
||||
apt-get install -y openjdk-8-jdk && \
|
||||
apt-get install -y ant && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /var/cache/oracle-jdk8-installer;
|
||||
|
||||
# Fix certificate issues, found as of
|
||||
# https://bugs.launchpad.net/ubuntu/+source/ca-certificates-java/+bug/983302
|
||||
|
||||
sudo apt-get update && \
|
||||
apt-get install -y ca-certificates-java && \
|
||||
apt-get clean && \
|
||||
update-ca-certificates -f && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /var/cache/oracle-jdk8-installer;
|
||||
|
||||
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Installing android sdk
|
||||
# https://github.com/circleci/circleci-images/blob/staging/android/Dockerfile.m4
|
||||
|
||||
_tmp_sdk_zip=/tmp/android-sdk-linux.zip
|
||||
_android_home=/opt/android/sdk
|
||||
|
||||
rm -rf $_android_home
|
||||
sudo mkdir -p $_android_home
|
||||
curl --silent --show-error --location --fail --retry 3 --output /tmp/android-sdk-linux.zip $_https_amazon_aws/android-sdk-linux-tools3859397-build-tools2803-2902-platforms28-29.zip
|
||||
sudo unzip -q $_tmp_sdk_zip -d $_android_home
|
||||
rm $_tmp_sdk_zip
|
||||
|
||||
sudo chmod -R 777 $_android_home
|
||||
|
||||
export ANDROID_HOME=$_android_home
|
||||
export ADB_INSTALL_TIMEOUT=120
|
||||
|
||||
export PATH="${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:${PATH}"
|
||||
echo "PATH:${PATH}"
|
||||
|
||||
# Installing Gradle
|
||||
echo "GRADLE_VERSION:${GRADLE_VERSION}"
|
||||
_gradle_home=/opt/gradle
|
||||
sudo rm -rf $gradle_home
|
||||
sudo mkdir -p $_gradle_home
|
||||
|
||||
curl --silent --output /tmp/gradle.zip --retry 3 $_https_amazon_aws/gradle-${GRADLE_VERSION}-bin.zip
|
||||
|
||||
sudo unzip -q /tmp/gradle.zip -d $_gradle_home
|
||||
rm /tmp/gradle.zip
|
||||
|
||||
sudo chmod -R 777 $_gradle_home
|
||||
|
||||
export GRADLE_HOME=$_gradle_home/gradle-$GRADLE_VERSION
|
||||
alias gradle="${GRADLE_HOME}/bin/gradle"
|
||||
|
||||
export PATH="${GRADLE_HOME}/bin/:${PATH}"
|
||||
echo "PATH:${PATH}"
|
||||
|
||||
gradle --version
|
||||
|
||||
mkdir /var/lib/jenkins/gradledeps
|
||||
cp build.gradle /var/lib/jenkins/gradledeps
|
||||
cp AndroidManifest.xml /var/lib/jenkins/gradledeps
|
||||
|
||||
pushd /var/lib/jenkins
|
||||
|
||||
export GRADLE_LOCAL_PROPERTIES=gradledeps/local.properties
|
||||
rm -f $GRADLE_LOCAL_PROPERTIES
|
||||
echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES
|
||||
echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES
|
||||
|
||||
chown -R jenkins /var/lib/jenkins/gradledeps
|
||||
chgrp -R jenkins /var/lib/jenkins/gradledeps
|
||||
|
||||
sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble
|
||||
|
||||
chown -R jenkins /var/lib/jenkins/.gradle
|
||||
chgrp -R jenkins /var/lib/jenkins/.gradle
|
||||
|
||||
popd
|
||||
|
||||
rm -rf /var/lib/jenkins/.gradle/daemon
|
|
@ -0,0 +1,169 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
|
||||
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
|
||||
# find the correct image. As a result, here we have to check for
|
||||
# "$UBUNTU_VERSION" == "18.04"*
|
||||
# instead of
|
||||
# "$UBUNTU_VERSION" == "18.04"
|
||||
if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
|
||||
cmake3="cmake=3.10*"
|
||||
maybe_libiomp_dev="libiomp-dev"
|
||||
elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
|
||||
cmake3="cmake=3.16*"
|
||||
maybe_libiomp_dev=""
|
||||
elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
|
||||
cmake3="cmake=3.22*"
|
||||
maybe_libiomp_dev=""
|
||||
else
|
||||
cmake3="cmake=3.5*"
|
||||
maybe_libiomp_dev="libiomp-dev"
|
||||
fi
|
||||
|
||||
if [[ "$CLANG_VERSION" == 12 ]]; then
|
||||
maybe_libomp_dev="libomp-12-dev"
|
||||
elif [[ "$CLANG_VERSION" == 10 ]]; then
|
||||
maybe_libomp_dev="libomp-10-dev"
|
||||
else
|
||||
maybe_libomp_dev=""
|
||||
fi
|
||||
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
|
||||
|
||||
# Install common dependencies
|
||||
apt-get update
|
||||
# TODO: Some of these may not be necessary
|
||||
ccache_deps="asciidoc docbook-xml docbook-xsl xsltproc"
|
||||
deploy_deps="libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev"
|
||||
numpy_deps="gfortran"
|
||||
apt-get install -y --no-install-recommends \
|
||||
$ccache_deps \
|
||||
$numpy_deps \
|
||||
${deploy_deps} \
|
||||
${cmake3} \
|
||||
apt-transport-https \
|
||||
autoconf \
|
||||
automake \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
libatlas-base-dev \
|
||||
libc6-dbg \
|
||||
${maybe_libiomp_dev} \
|
||||
libyaml-dev \
|
||||
libz-dev \
|
||||
libjpeg-dev \
|
||||
libasound2-dev \
|
||||
libsndfile-dev \
|
||||
${maybe_libomp_dev} \
|
||||
software-properties-common \
|
||||
wget \
|
||||
sudo \
|
||||
vim \
|
||||
jq \
|
||||
libtool \
|
||||
vim \
|
||||
unzip \
|
||||
gdb
|
||||
|
||||
# Should resolve issues related to various apt package repository cert issues
|
||||
# see: https://github.com/pytorch/pytorch/issues/65931
|
||||
apt-get install -y libgnutls30
|
||||
|
||||
# cuda-toolkit does not work with gcc-11.2.0 which is default in Ubunutu 22.04
|
||||
# see: https://github.com/NVlabs/instant-ngp/issues/119
|
||||
if [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
|
||||
apt-get install -y g++-10
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30
|
||||
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-10 30
|
||||
|
||||
# https://www.spinics.net/lists/libreoffice/msg07549.html
|
||||
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/11
|
||||
wget https://github.com/gcc-mirror/gcc/commit/2b2d97fc545635a0f6aa9c9ee3b017394bc494bf.patch -O noexecpt.patch
|
||||
sudo patch /usr/include/c++/10/bits/range_access.h noexecpt.patch
|
||||
fi
|
||||
|
||||
# Cleanup package manager
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
|
||||
numpy_deps="gcc-gfortran"
|
||||
# Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
|
||||
# for Caffe2. That said, we still install them to make sure the build
|
||||
# system opts to build/use protoc and libprotobuf from third-party.
|
||||
yum install -y \
|
||||
$ccache_deps \
|
||||
$numpy_deps \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
cmake \
|
||||
cmake3 \
|
||||
curl \
|
||||
gcc \
|
||||
gcc-c++ \
|
||||
gflags-devel \
|
||||
git \
|
||||
glibc-devel \
|
||||
glibc-headers \
|
||||
glog-devel \
|
||||
hiredis-devel \
|
||||
libstdc++-devel \
|
||||
libsndfile-devel \
|
||||
make \
|
||||
opencv-devel \
|
||||
sudo \
|
||||
wget \
|
||||
vim \
|
||||
unzip \
|
||||
gdb
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Install Valgrind separately since the apt-get version is too old.
|
||||
mkdir valgrind_build && cd valgrind_build
|
||||
VALGRIND_VERSION=3.20.0
|
||||
wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
|
||||
tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
|
||||
cd valgrind-${VALGRIND_VERSION}
|
||||
./configure --prefix=/usr/local
|
||||
make -j6
|
||||
sudo make install
|
||||
cd ../../
|
||||
rm -rf valgrind_build
|
||||
alias valgrind="/usr/local/bin/valgrind"
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
echo "Preparing to build sccache from source"
|
||||
apt-get update
|
||||
# libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
|
||||
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
|
||||
apt-get install -y cargo
|
||||
echo "Checking out sccache repo"
|
||||
git clone https://github.com/pytorch/sccache
|
||||
cd sccache
|
||||
echo "Building sccache"
|
||||
cargo build --release
|
||||
cp target/release/sccache /opt/cache/bin
|
||||
echo "Cleaning up"
|
||||
cd ..
|
||||
rm -rf sccache
|
||||
apt-get remove -y cargo rustc
|
||||
apt-get autoclean && apt-get clean
|
||||
}
|
||||
|
||||
install_binary() {
|
||||
echo "Downloading sccache binary from S3 repo"
|
||||
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
|
||||
}
|
||||
|
||||
mkdir -p /opt/cache/bin
|
||||
mkdir -p /opt/cache/lib
|
||||
sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
|
||||
export PATH="/opt/cache/bin:$PATH"
|
||||
|
||||
# Setup compiler cache
|
||||
if [ -n "$ROCM_VERSION" ]; then
|
||||
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
|
||||
else
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
*)
|
||||
install_binary
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
chmod a+x /opt/cache/bin/sccache
|
||||
|
||||
function write_sccache_stub() {
|
||||
# Unset LD_PRELOAD for ps because of asan + ps issues
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
|
||||
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
|
||||
chmod a+x "/opt/cache/bin/$1"
|
||||
}
|
||||
|
||||
write_sccache_stub cc
|
||||
write_sccache_stub c++
|
||||
write_sccache_stub gcc
|
||||
write_sccache_stub g++
|
||||
|
||||
# NOTE: See specific ROCM_VERSION case below.
|
||||
if [ "x$ROCM_VERSION" = x ]; then
|
||||
write_sccache_stub clang
|
||||
write_sccache_stub clang++
|
||||
fi
|
||||
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
# TODO: This is a workaround for the fact that PyTorch's FindCUDA
|
||||
# implementation cannot find nvcc if it is setup this way, because it
|
||||
# appears to search for the nvcc in PATH, and use its path to infer
|
||||
# where CUDA is installed. Instead, we install an nvcc symlink outside
|
||||
# of the PATH, and set CUDA_NVCC_EXECUTABLE so that we make use of it.
|
||||
|
||||
write_sccache_stub nvcc
|
||||
mv /opt/cache/bin/nvcc /opt/cache/lib/
|
||||
fi
|
||||
|
||||
if [ -n "$ROCM_VERSION" ]; then
|
||||
# ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
|
||||
# hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
|
||||
# causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
|
||||
# directly under /opt/rocm while also preserving the original compiler names.
|
||||
# Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
|
||||
# Final link in symlink chain must point back to original directory.
|
||||
|
||||
# Original compiler is moved one directory deeper. Wrapper replaces it.
|
||||
function write_sccache_stub_rocm() {
|
||||
OLDCOMP=$1
|
||||
COMPNAME=$(basename $OLDCOMP)
|
||||
TOPDIR=$(dirname $OLDCOMP)
|
||||
WRAPPED="$TOPDIR/original/$COMPNAME"
|
||||
mv "$OLDCOMP" "$WRAPPED"
|
||||
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP"
|
||||
chmod a+x "$OLDCOMP"
|
||||
}
|
||||
|
||||
if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
|
||||
# ROCm 3.3 or earlier.
|
||||
mkdir /opt/rocm/hcc/bin/original
|
||||
write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
|
||||
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
|
||||
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
|
||||
# Fix last link in symlink chain, clang points to versioned clang in prior dir
|
||||
pushd /opt/rocm/hcc/bin/original
|
||||
ln -s ../$(readlink clang)
|
||||
popd
|
||||
elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
|
||||
# ROCm 3.5 and beyond.
|
||||
mkdir /opt/rocm/llvm/bin/original
|
||||
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
|
||||
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
|
||||
# Fix last link in symlink chain, clang points to versioned clang in prior dir
|
||||
pushd /opt/rocm/llvm/bin/original
|
||||
ln -s ../$(readlink clang)
|
||||
popd
|
||||
else
|
||||
echo "Cannot find ROCm compiler."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ -n "$CLANG_VERSION" ]; then
|
||||
|
||||
if [[ $CLANG_VERSION == 7 && $UBUNTU_VERSION == 16.04 ]]; then
|
||||
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
sudo apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main"
|
||||
elif [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
|
||||
sudo apt-get update
|
||||
# gpg-agent is not available by default on 18.04
|
||||
sudo apt-get install -y --no-install-recommends gpg-agent
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
|
||||
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
# work around ubuntu apt-get conflicts
|
||||
sudo apt-get -y -f install
|
||||
fi
|
||||
|
||||
sudo apt-get update
|
||||
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
|
||||
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
|
||||
|
||||
# Install dev version of LLVM.
|
||||
if [ -n "$LLVMDEV" ]; then
|
||||
sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
|
||||
fi
|
||||
|
||||
# Use update-alternatives to make this version the default
|
||||
# TODO: Decide if overriding gcc as well is a good idea
|
||||
# update-alternatives --install /usr/bin/gcc gcc /usr/bin/clang-"$CLANG_VERSION" 50
|
||||
# update-alternatives --install /usr/bin/g++ g++ /usr/bin/clang++-"$CLANG_VERSION" 50
|
||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
|
||||
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
|
||||
|
||||
# clang's packaging is a little messed up (the runtime libs aren't
|
||||
# added into the linker path), so give it a little help
|
||||
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
|
||||
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
|
||||
ldconfig
|
||||
|
||||
# Cleanup package manager
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
fi
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "$CMAKE_VERSION" ]
|
||||
|
||||
# Remove system cmake install so it won't get used instead
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
apt-get remove cmake -y
|
||||
;;
|
||||
centos)
|
||||
yum remove cmake -y
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Turn 3.6.3 into v3.6
|
||||
path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
|
||||
file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"
|
||||
|
||||
# Download and install specific CMake version in /usr/local
|
||||
pushd /tmp
|
||||
curl -Os --retry 3 "https://cmake.org/files/${path}/${file}"
|
||||
tar -C /usr/local --strip-components 1 --no-same-owner -zxf cmake-*.tar.gz
|
||||
rm -f cmake-*.tar.gz
|
||||
popd
|
|
@ -0,0 +1,98 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Optionally install conda
|
||||
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
BASE_URL="https://repo.anaconda.com/miniconda"
|
||||
|
||||
MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
|
||||
|
||||
case "$MAJOR_PYTHON_VERSION" in
|
||||
2)
|
||||
CONDA_FILE="Miniconda2-latest-Linux-x86_64.sh"
|
||||
;;
|
||||
3)
|
||||
CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
mkdir -p /opt/conda
|
||||
chown jenkins:jenkins /opt/conda
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
pushd /tmp
|
||||
wget -q "${BASE_URL}/${CONDA_FILE}"
|
||||
# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
|
||||
as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
|
||||
popd
|
||||
|
||||
# NB: Don't do this, rely on the rpath to get it right
|
||||
#echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
|
||||
#ldconfig
|
||||
sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
|
||||
export PATH="/opt/conda/bin:$PATH"
|
||||
|
||||
# Ensure we run conda in a directory that jenkins has write access to
|
||||
pushd /opt/conda
|
||||
|
||||
# Prevent conda from updating to 4.14.0, which causes docker build failures
|
||||
# See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
|
||||
# Uncomment the below when resolved to track the latest conda update
|
||||
# as_jenkins conda update -y -n base conda
|
||||
|
||||
# Install correct Python version
|
||||
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION"
|
||||
|
||||
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
|
||||
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
|
||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
# TODO: Stop using `-c malfet`
|
||||
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0 -c malfet
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
|
||||
else
|
||||
# Install `typing-extensions` for 3.7
|
||||
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} typing-extensions
|
||||
fi
|
||||
|
||||
# Use conda cmake in some cases. Conda cmake will be newer than our supported
|
||||
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
|
||||
# following builds that we know should use conda. Specifically, Ubuntu bionic
|
||||
# and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
conda_install cmake
|
||||
fi
|
||||
|
||||
# Magma package names are concatenation of CUDA major and minor ignoring revision
|
||||
# I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
|
||||
fi
|
||||
|
||||
# Install some other packages, including those needed for Python test reporting
|
||||
pip_install -r /opt/conda/requirements-ci.txt
|
||||
|
||||
# Update scikit-learn to a python-3.8 compatible version
|
||||
if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
|
||||
pip_install -U scikit-learn
|
||||
else
|
||||
# Pinned scikit-learn due to https://github.com/scikit-learn/scikit-learn/issues/14485 (affects gcc 5.5 only)
|
||||
pip_install scikit-learn==0.20.3
|
||||
fi
|
||||
|
||||
popd
|
||||
fi
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [[ ${CUDNN_VERSION} == 8 ]]; then
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
|
||||
if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
|
||||
curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
|
||||
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
|
||||
else
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
|
||||
fi
|
||||
|
||||
tar xf ${CUDNN_NAME}.tar.xz
|
||||
cp -a ${CUDNN_NAME}/include/* /usr/include/
|
||||
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
|
||||
cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
|
||||
|
||||
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
|
||||
cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
ldconfig
|
||||
fi
|
|
@ -0,0 +1,49 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends \
|
||||
libhiredis-dev \
|
||||
libleveldb-dev \
|
||||
liblmdb-dev \
|
||||
libsnappy-dev
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
yum install -y \
|
||||
hiredis-devel \
|
||||
leveldb-devel \
|
||||
lmdb-devel \
|
||||
snappy-devel
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "$DEVTOOLSET_VERSION" ]
|
||||
|
||||
yum install -y centos-release-scl
|
||||
yum install -y devtoolset-$DEVTOOLSET_VERSION
|
||||
|
||||
echo "source scl_source enable devtoolset-$DEVTOOLSET_VERSION" > "/etc/profile.d/devtoolset-$DEVTOOLSET_VERSION.sh"
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ -n "$KATEX" ]; then
|
||||
apt-get update
|
||||
# Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
|
||||
apt-get install -y gpg-agent || :
|
||||
|
||||
curl --retry 3 -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -
|
||||
sudo apt-get install -y nodejs
|
||||
|
||||
curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
|
||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
|
||||
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends yarn
|
||||
yarn global add katex --prefix /usr/local
|
||||
|
||||
sudo apt-get -y install doxygen
|
||||
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
fi
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ -n "$GCC_VERSION" ]; then
|
||||
|
||||
# Need the official toolchain repo to get alternate packages
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
if [[ "$UBUNTU_VERSION" == "16.04" && "${GCC_VERSION:0:1}" == "5" ]]; then
|
||||
apt-get install -y g++-5=5.4.0-6ubuntu1~16.04.12
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50
|
||||
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-5 50
|
||||
else
|
||||
apt-get install -y g++-$GCC_VERSION
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
|
||||
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
|
||||
fi
|
||||
|
||||
|
||||
# Cleanup package manager
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
fi
|
|
@ -0,0 +1,34 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "$GLIBC_VERSION" ]
|
||||
if [[ -n "$CENTOS_VERSION" ]]; then
|
||||
[ -n "$DEVTOOLSET_VERSION" ]
|
||||
fi
|
||||
|
||||
yum install -y wget sed
|
||||
|
||||
mkdir -p /packages && cd /packages
|
||||
wget -q http://ftp.gnu.org/gnu/glibc/glibc-$GLIBC_VERSION.tar.gz
|
||||
tar xzf glibc-$GLIBC_VERSION.tar.gz
|
||||
if [[ "$GLIBC_VERSION" == "2.26" ]]; then
|
||||
cd glibc-$GLIBC_VERSION
|
||||
sed -i 's/$name ne "nss_test1"/$name ne "nss_test1" \&\& $name ne "nss_test2"/' scripts/test-installation.pl
|
||||
cd ..
|
||||
fi
|
||||
mkdir -p glibc-$GLIBC_VERSION-build && cd glibc-$GLIBC_VERSION-build
|
||||
|
||||
if [[ -n "$CENTOS_VERSION" ]]; then
|
||||
export PATH=/opt/rh/devtoolset-$DEVTOOLSET_VERSION/root/usr/bin:$PATH
|
||||
fi
|
||||
|
||||
../glibc-$GLIBC_VERSION/configure --prefix=/usr CFLAGS='-Wno-stringop-truncation -Wno-format-overflow -Wno-restrict -Wno-format-truncation -g -O2'
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
# Cleanup
|
||||
rm -rf /packages
|
||||
rm -rf /var/cache/yum/*
|
||||
rm -rf /var/lib/rpm/__db.*
|
||||
yum clean all
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
mkdir -p /usr/local/include
|
||||
cp jni.h /usr/local/include
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
git clone --branch v1.15 https://github.com/linux-test-project/lcov.git
|
||||
pushd lcov
|
||||
sudo make install # will be installed in /usr/local/bin/lcov
|
||||
popd
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ]; then
|
||||
apt update
|
||||
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
|
||||
fi
|
||||
|
||||
# Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
|
||||
git clone https://github.com/pytorch/pytorch.git --depth 1
|
||||
chown -R jenkins pytorch
|
||||
|
||||
pushd pytorch
|
||||
# Install all linter dependencies
|
||||
pip_install -r requirements.txt
|
||||
conda_run lintrunner init
|
||||
|
||||
# Cache .lintbin directory as part of the Docker image
|
||||
cp -r .lintbin /tmp
|
||||
popd
|
||||
|
||||
# Node dependencies required by toc linter job
|
||||
npm install -g markdown-toc
|
||||
|
||||
# Cleaning up
|
||||
rm -rf pytorch
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "$NINJA_VERSION" ]
|
||||
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
|
||||
|
||||
pushd /tmp
|
||||
wget --no-verbose --output-document=ninja-linux.zip "$url"
|
||||
unzip ninja-linux.zip -d /usr/local/bin
|
||||
rm -f ninja-linux.zip
|
||||
popd
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
|
||||
sudo apt-get update
|
||||
# also install ssh to avoid error of:
|
||||
# --------------------------------------------------------------------------
|
||||
# The value of the MCA parameter "plm_rsh_agent" was set to a path
|
||||
# that could not be found:
|
||||
# plm_rsh_agent: ssh : rsh
|
||||
sudo apt-get install -y ssh
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
OPENSSL=openssl-1.1.1k
|
||||
|
||||
wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
|
||||
tar xf "${OPENSSL}.tar.gz"
|
||||
cd "${OPENSSL}"
|
||||
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
|
||||
# NOTE: openssl install errors out when built with the -j option
|
||||
make -j6; make install_sw
|
||||
# Link the ssl libraries to the /usr/lib folder.
|
||||
sudo ln -s /opt/openssl/lib/lib* /usr/lib
|
||||
cd ..
|
||||
rm -rf "${OPENSSL}"
|
|
@ -0,0 +1,56 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# This function installs protobuf 3.17
|
||||
install_protobuf_317() {
|
||||
pb_dir="/usr/temp_pb_install_dir"
|
||||
mkdir -p $pb_dir
|
||||
|
||||
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
|
||||
# else it will fail with
|
||||
# g++: error: ./../lib64/crti.o: No such file or directory
|
||||
ln -s /usr/lib64 "$pb_dir/lib64"
|
||||
|
||||
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
|
||||
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
|
||||
# -j6 to balance memory usage and speed.
|
||||
# naked `-j` seems to use too much memory.
|
||||
pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
|
||||
popd
|
||||
rm -rf $pb_dir
|
||||
}
|
||||
|
||||
install_ubuntu() {
|
||||
# Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
|
||||
# install cmake3 here and use cmake3.
|
||||
apt-get update
|
||||
if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
|
||||
apt-get install -y --no-install-recommends cmake3
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
install_protobuf_317
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
install_protobuf_317
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,146 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
ver() {
|
||||
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
|
||||
}
|
||||
|
||||
# Map ROCm version to AMDGPU version
|
||||
declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
if [[ $UBUNTU_VERSION == 18.04 ]]; then
|
||||
# gpg-agent is not available by default on 18.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
fi
|
||||
if [[ $UBUNTU_VERSION == 20.04 ]]; then
|
||||
# gpg-agent is not available by default on 20.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
fi
|
||||
apt-get install -y kmod
|
||||
apt-get install -y wget
|
||||
|
||||
# Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
|
||||
apt-get install -y libc++1
|
||||
apt-get install -y libc++abi1
|
||||
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
|
||||
# Add amdgpu repository
|
||||
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
||||
local amdgpu_baseurl
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
|
||||
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
|
||||
else
|
||||
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
|
||||
fi
|
||||
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
||||
fi
|
||||
|
||||
ROCM_REPO="ubuntu"
|
||||
if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
|
||||
ROCM_REPO="xenial"
|
||||
fi
|
||||
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
|
||||
ROCM_REPO="${UBUNTU_VERSION_NAME}"
|
||||
fi
|
||||
|
||||
# Add rocm repository
|
||||
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
|
||||
local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
|
||||
echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
|
||||
apt-get update --allow-insecure-repositories
|
||||
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
|
||||
rocm-dev \
|
||||
rocm-utils \
|
||||
rocm-libs \
|
||||
rccl \
|
||||
rocprofiler-dev \
|
||||
roctracer-dev
|
||||
|
||||
# precompiled miopen kernels added in ROCm 3.5; search for all unversioned packages
|
||||
# if search fails it will abort this script; use true to avoid case where search fails
|
||||
MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
|
||||
if [[ "x${MIOPENKERNELS}" = x ]]; then
|
||||
echo "miopenkernels package not available"
|
||||
else
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
|
||||
yum update -y
|
||||
yum install -y kmod
|
||||
yum install -y wget
|
||||
yum install -y openblas-devel
|
||||
|
||||
yum install -y epel-release
|
||||
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
|
||||
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
|
||||
# Add amdgpu repository
|
||||
local amdgpu_baseurl
|
||||
if [[ $OS_VERSION == 9 ]]; then
|
||||
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
|
||||
else
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
|
||||
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
|
||||
else
|
||||
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
|
||||
fi
|
||||
fi
|
||||
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
|
||||
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
|
||||
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
|
||||
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
|
||||
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
|
||||
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
|
||||
fi
|
||||
|
||||
local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
|
||||
echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
|
||||
echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
|
||||
echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
|
||||
echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
|
||||
echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
|
||||
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
|
||||
|
||||
yum update -y
|
||||
|
||||
yum install -y \
|
||||
rocm-dev \
|
||||
rocm-utils \
|
||||
rocm-libs \
|
||||
rccl \
|
||||
rocprofiler-dev \
|
||||
roctracer-dev
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install Python packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# "install" hipMAGMA into /opt/rocm/magma by copying after build
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
# Fixes memory leaks of magma found while executing linalg UTs
|
||||
git checkout 5959b8783e45f1809812ed96ae762f38ee701972
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
|
||||
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
|
||||
export PATH="${PATH}:/opt/rocm/bin"
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
|
||||
done
|
||||
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
|
||||
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
|
||||
make -f make.gen.hipMAGMA -j $(nproc)
|
||||
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
|
||||
make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
|
||||
popd
|
||||
mv magma /opt/rocm
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${SWIFTSHADER}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
# SwiftShader
|
||||
_swiftshader_dir=/var/lib/jenkins/swiftshader
|
||||
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
|
||||
mkdir -p $_swiftshader_dir
|
||||
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
|
||||
|
||||
curl --silent --show-error --location --fail --retry 3 \
|
||||
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
|
||||
|
||||
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
|
||||
|
||||
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"
|
|
@ -0,0 +1,14 @@
|
|||
apt-get update
|
||||
apt-get install -y sudo wget libboost-dev libboost-test-dev libboost-program-options-dev libboost-filesystem-dev libboost-thread-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev
|
||||
wget https://www-us.apache.org/dist/thrift/0.12.0/thrift-0.12.0.tar.gz
|
||||
tar -xvf thrift-0.12.0.tar.gz
|
||||
cd thrift-0.12.0
|
||||
for file in ./compiler/cpp/Makefile*; do
|
||||
sed -i 's/\-Werror//' $file
|
||||
done
|
||||
./bootstrap.sh
|
||||
./configure --without-php --without-java --without-python --without-nodejs --without-go --without-ruby
|
||||
sudo make
|
||||
sudo make install
|
||||
cd ..
|
||||
rm thrift-0.12.0.tar.gz
|
|
@ -0,0 +1,48 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ -d "/usr/local/cuda/" ]]; then
|
||||
with_cuda=/usr/local/cuda/
|
||||
else
|
||||
with_cuda=no
|
||||
fi
|
||||
|
||||
function install_ucx() {
|
||||
set -ex
|
||||
git clone --recursive https://github.com/openucx/ucx.git
|
||||
pushd ucx
|
||||
git checkout ${UCX_COMMIT}
|
||||
git submodule update --init --recursive
|
||||
|
||||
./autogen.sh
|
||||
./configure --prefix=$UCX_HOME \
|
||||
--enable-mt \
|
||||
--with-cuda=$with_cuda \
|
||||
--enable-profiling \
|
||||
--enable-stats
|
||||
time make -j
|
||||
sudo make install
|
||||
|
||||
popd
|
||||
rm -rf ucx
|
||||
}
|
||||
|
||||
function install_ucc() {
|
||||
set -ex
|
||||
git clone --recursive https://github.com/openucx/ucc.git
|
||||
pushd ucc
|
||||
git checkout ${UCC_COMMIT}
|
||||
git submodule update --init --recursive
|
||||
|
||||
./autogen.sh
|
||||
./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda
|
||||
time make -j
|
||||
sudo make install
|
||||
|
||||
popd
|
||||
rm -rf ucc
|
||||
}
|
||||
|
||||
install_ucx
|
||||
install_ucc
|
|
@ -0,0 +1,33 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Mirror jenkins user in container
|
||||
# jenkins user as ec2-user should have the same user-id
|
||||
echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
|
||||
echo "jenkins:x:1000:" >> /etc/group
|
||||
# Needed on focal or newer
|
||||
echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
|
||||
|
||||
# Create $HOME
|
||||
mkdir -p /var/lib/jenkins
|
||||
chown jenkins:jenkins /var/lib/jenkins
|
||||
mkdir -p /var/lib/jenkins/.ccache
|
||||
chown jenkins:jenkins /var/lib/jenkins/.ccache
|
||||
|
||||
# Allow writing to /usr/local (for make install)
|
||||
chown jenkins:jenkins /usr/local
|
||||
|
||||
# Allow sudo
|
||||
# TODO: Maybe we shouldn't
|
||||
echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
|
||||
|
||||
# Work around bug where devtoolset replaces sudo and breaks it.
|
||||
if [ -n "$DEVTOOLSET_VERSION" ]; then
|
||||
SUDO=/bin/sudo
|
||||
else
|
||||
SUDO=sudo
|
||||
fi
|
||||
|
||||
# Test that sudo works
|
||||
$SUDO -u jenkins $SUDO -v
|
|
@ -0,0 +1,45 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopencv-dev \
|
||||
libavcodec-dev
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
yum install -y \
|
||||
opencv-devel \
|
||||
ffmpeg-devel
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${VULKAN_SDK_VERSION}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
|
||||
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
|
||||
|
||||
curl \
|
||||
--silent \
|
||||
--show-error \
|
||||
--location \
|
||||
--fail \
|
||||
--retry 3 \
|
||||
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
|
||||
|
||||
mkdir -p "${_vulkansdk_dir}"
|
||||
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
|
||||
rm -rf "${_tmp_vulkansdk_targz}"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,34 @@
|
|||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
|
@ -0,0 +1,260 @@
|
|||
# Python dependencies required for unit tests
|
||||
|
||||
#awscli==1.6 #this breaks some platforms
|
||||
#Description: AWS command line interface
|
||||
#Pinned versions: 1.6
|
||||
#test that import:
|
||||
|
||||
boto3==1.19.12
|
||||
#Description: AWS SDK for python
|
||||
#Pinned versions: 1.19.12, 1.16.34
|
||||
#test that import:
|
||||
|
||||
click
|
||||
#Description: Command Line Interface Creation Kit
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
coremltools==5.0b5
|
||||
#Description: Apple framework for ML integration
|
||||
#Pinned versions: 5.0b5
|
||||
#test that import:
|
||||
|
||||
#dataclasses #this breaks some platforms
|
||||
#Description: Provides decorators for auto adding special methods to user classes
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
expecttest==0.1.3
|
||||
#Description: method for writing tests where test framework auto populates
|
||||
# the expected output based on previous runs
|
||||
#Pinned versions: 0.1.3
|
||||
#test that import:
|
||||
|
||||
flatbuffers==2.0
|
||||
#Description: cross platform serialization library
|
||||
#Pinned versions: 2.0
|
||||
#test that import:
|
||||
|
||||
hypothesis==5.35.1
|
||||
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
|
||||
#Description: advanced library for generating parametrized tests
|
||||
#Pinned versions: 3.44.6, 4.53.2
|
||||
#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
|
||||
|
||||
junitparser==2.1.1
|
||||
#Description: unitparser handles JUnit/xUnit Result XML files
|
||||
#Pinned versions: 2.1.1
|
||||
#test that import:
|
||||
|
||||
librosa>=0.6.2 ; python_version < "3.11"
|
||||
#Description: A python package for music and audio analysis
|
||||
#Pinned versions: >=0.6.2
|
||||
#test that import: test_spectral_ops.py
|
||||
|
||||
#mkl #this breaks linux-bionic-rocm4.5-py3.7
|
||||
#Description: Intel oneAPI Math Kernel Library
|
||||
#Pinned versions:
|
||||
#test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
|
||||
#test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
|
||||
#test_autograd.py
|
||||
|
||||
#mkl-devel
|
||||
# see mkl
|
||||
|
||||
#mock # breaks ci/circleci: docker-pytorch-linux-xenial-py3-clang5-android-ndk-r19c
|
||||
#Description: A testing library that allows you to replace parts of your
|
||||
#system under test with mock objects
|
||||
#Pinned versions:
|
||||
#test that import: test_module_init.py, test_modules.py, test_nn.py,
|
||||
#test_testing.py
|
||||
|
||||
#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
|
||||
#Description: collects runtime types of function arguments and return
|
||||
#values, and can automatically generate stub files
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
mypy==0.960
|
||||
# Pin MyPy version because new errors are likely to appear with each release
|
||||
#Description: linter
|
||||
#Pinned versions: 0.960
|
||||
#test that import: test_typing.py, test_type_hints.py
|
||||
|
||||
networkx==2.6.3
|
||||
#Description: creation, manipulation, and study of
|
||||
#the structure, dynamics, and functions of complex networks
|
||||
#Pinned versions: 2.6.3 (latest version that works with Python 3.7+)
|
||||
#test that import: functorch
|
||||
|
||||
#ninja
|
||||
#Description: build system. Note that it install from
|
||||
#here breaks things so it is commented out
|
||||
#Pinned versions: 1.10.0.post1
|
||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||
|
||||
numba==0.49.0 ; python_version < "3.9"
|
||||
numba==0.54.1 ; python_version == "3.9"
|
||||
numba==0.55.2 ; python_version == "3.10"
|
||||
#Description: Just-In-Time Compiler for Numerical Functions
|
||||
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
||||
#test that import: test_numba_integration.py
|
||||
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
||||
|
||||
#numpy
|
||||
#Description: Provides N-dimensional arrays and linear algebra
|
||||
#Pinned versions: 1.20
|
||||
#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
|
||||
#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
|
||||
#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
|
||||
#test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
|
||||
#test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
|
||||
#test_overrides.py, test_numpy_interop.py, test_numba_integration.py
|
||||
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
|
||||
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
|
||||
#test_binary_ufuncs.py
|
||||
|
||||
#onnxruntime
|
||||
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
|
||||
#Pinned versions: 1.9.0
|
||||
#test that import:
|
||||
|
||||
opt-einsum==3.3
|
||||
#Description: Python library to optimize tensor contraction order, used in einsum
|
||||
#Pinned versions: 3.3
|
||||
#test that import: test_linalg.py
|
||||
|
||||
#pillow
|
||||
#Description: Python Imaging Library fork
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
protobuf==3.20.2
|
||||
#Description: Google’s data interchange format
|
||||
#Pinned versions: 3.20.1
|
||||
#test that import: test_tensorboard.py
|
||||
|
||||
psutil
|
||||
#Description: information on running processes and system utilization
|
||||
#Pinned versions:
|
||||
#test that import: test_profiler.py, test_openmp.py, test_dataloader.py
|
||||
|
||||
pytest
|
||||
#Description: testing framework
|
||||
#Pinned versions:
|
||||
#test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
|
||||
|
||||
pytest-xdist
|
||||
#Description: plugin for running pytest in parallel
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
pytest-shard
|
||||
#Description: plugin spliting up tests in pytest
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
pytest-flakefinder==1.1.0
|
||||
#Description: plugin for rerunning tests a fixed number of times in pytest
|
||||
#Pinned versions: 1.1.0
|
||||
#test that import:
|
||||
|
||||
pytest-rerunfailures
|
||||
#Description: plugin for rerunning failure tests in pytest
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#pytest-benchmark
|
||||
#Description: fixture for benchmarking code
|
||||
#Pinned versions: 3.2.3
|
||||
#test that import:
|
||||
|
||||
#pytest-sugar
|
||||
#Description: shows failures and errors instantly
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
xdoctest==1.1.0
|
||||
#Description: runs doctests in pytest
|
||||
#Pinned versions: 1.1.0
|
||||
#test that import:
|
||||
|
||||
pygments==2.12.0
|
||||
#Description: support doctest highlighting
|
||||
#Pinned versions: 2.12.0
|
||||
#test that import: the doctests
|
||||
|
||||
#PyYAML
|
||||
#Description: data serialization format
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#requests
|
||||
#Description: HTTP library
|
||||
#Pinned versions:
|
||||
#test that import: test_type_promotion.py
|
||||
|
||||
#rich
|
||||
#Description: rich text and beautiful formatting in the terminal
|
||||
#Pinned versions: 10.9.0
|
||||
#test that import:
|
||||
|
||||
scikit-image
|
||||
#Description: image processing routines
|
||||
#Pinned versions:
|
||||
#test that import: test_nn.py
|
||||
|
||||
#scikit-learn
|
||||
#Description: machine learning package
|
||||
#Pinned versions: 0.20.3
|
||||
#test that import:
|
||||
|
||||
scipy==1.6.3 ; python_version < "3.10"
|
||||
scipy==1.8.1 ; python_version == "3.10"
|
||||
scipy==1.9.3 ; python_version == "3.11"
|
||||
# Pin SciPy because of failing distribution tests (see #60347)
|
||||
#Description: scientific python
|
||||
#Pinned versions: 1.6.3
|
||||
#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
|
||||
#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
|
||||
#test_linalg.py, test_binary_ufuncs.py
|
||||
|
||||
#tabulate
|
||||
#Description: Pretty-print tabular data
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
tb-nightly
|
||||
#Description: TensorBoard
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#typing-extensions
|
||||
#Description: type hints for python
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#virtualenv
|
||||
#Description: virtual environment for python
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
unittest-xml-reporting<=3.2.0,>=2.0.0
|
||||
#Description: saves unit test results to xml
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
lintrunner==0.9.2
|
||||
#Description: all about linters
|
||||
#Pinned versions: 0.9.2
|
||||
#test that import:
|
||||
|
||||
rockset==1.0.3
|
||||
#Description: queries Rockset
|
||||
#Pinned versions: 1.0.3
|
||||
#test that import:
|
||||
|
||||
ghstack==0.7.1
|
||||
#Description: ghstack tool
|
||||
#Pinned versions: 0.7.1
|
||||
#test that import:
|
|
@ -0,0 +1,132 @@
|
|||
ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
ARG IMAGE_NAME
|
||||
|
||||
FROM ${IMAGE_NAME}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install katex
|
||||
ARG KATEX
|
||||
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
|
||||
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
ARG CONDA_CMAKE
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install gcc
|
||||
ARG GCC_VERSION
|
||||
COPY ./common/install_gcc.sh install_gcc.sh
|
||||
RUN bash ./install_gcc.sh && rm install_gcc.sh
|
||||
|
||||
# Install clang
|
||||
ARG CLANG_VERSION
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
COPY ./common/install_protobuf.sh install_protobuf.sh
|
||||
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV and ffmpeg
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh install_vision.sh
|
||||
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install UCC
|
||||
ARG UCX_COMMIT
|
||||
ARG UCC_COMMIT
|
||||
ENV UCX_COMMIT $UCX_COMMIT
|
||||
ENV UCC_COMMIT $UCC_COMMIT
|
||||
ENV UCX_HOME /usr
|
||||
ENV UCC_HOME /usr
|
||||
ADD ./common/install_ucc.sh install_ucc.sh
|
||||
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
|
||||
RUN rm install_ucc.sh
|
||||
|
||||
COPY ./common/install_openssl.sh install_openssl.sh
|
||||
ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
# See https://github.com/pytorch/pytorch/issues/82174
|
||||
# TODO(sdym@fb.com):
|
||||
# check if this is needed after full off Xenial migration
|
||||
ENV CARGO_NET_GIT_FETCH_WITH_CLI true
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
|
||||
|
||||
# Add jni.h for java host build
|
||||
COPY ./common/install_jni.sh install_jni.sh
|
||||
COPY ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
# Install Open MPI for CUDA
|
||||
COPY ./common/install_openmpi.sh install_openmpi.sh
|
||||
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
|
||||
RUN rm install_openmpi.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
# AWS specific CUDA build guidance
|
||||
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||
ENV CUDA_PATH /usr/local/cuda
|
||||
|
||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
||||
|
||||
# Install CUDNN
|
||||
ARG CUDNN_VERSION
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cudnn.sh install_cudnn.sh
|
||||
RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
|
||||
RUN rm install_cudnn.sh
|
||||
|
||||
# Delete /usr/local/cuda-11.X/cuda-11.X symlinks
|
||||
RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
|
||||
RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
|
@ -0,0 +1 @@
|
|||
*.sh
|
|
@ -0,0 +1,102 @@
|
|||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Set AMD gpu targets to build for
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install clang
|
||||
ARG LLVMDEV
|
||||
ARG CLANG_VERSION
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install gcc
|
||||
ARG GCC_VERSION
|
||||
COPY ./common/install_gcc.sh install_gcc.sh
|
||||
RUN bash ./install_gcc.sh && rm install_gcc.sh
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
COPY ./common/install_protobuf.sh install_protobuf.sh
|
||||
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV and ffmpeg
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh install_vision.sh
|
||||
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# Install rocm
|
||||
ARG ROCM_VERSION
|
||||
COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN rm install_rocm_magma.sh
|
||||
ENV PATH /opt/rocm/bin:$PATH
|
||||
ENV PATH /opt/rocm/hcc/bin:$PATH
|
||||
ENV PATH /opt/rocm/hip/bin:$PATH
|
||||
ENV PATH /opt/rocm/opencl/bin:$PATH
|
||||
ENV PATH /opt/rocm/llvm/bin:$PATH
|
||||
ENV MAGMA_HOME /opt/rocm/magma
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
|
@ -0,0 +1,165 @@
|
|||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
ARG CLANG_VERSION
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install clang
|
||||
ARG LLVMDEV
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# (optional) Install thrift.
|
||||
ARG THRIFT
|
||||
COPY ./common/install_thrift.sh install_thrift.sh
|
||||
RUN if [ -n "${THRIFT}" ]; then bash ./install_thrift.sh; fi
|
||||
RUN rm install_thrift.sh
|
||||
ENV INSTALLED_THRIFT ${THRIFT}
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install katex
|
||||
ARG KATEX
|
||||
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
|
||||
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install gcc
|
||||
ARG GCC_VERSION
|
||||
COPY ./common/install_gcc.sh install_gcc.sh
|
||||
RUN bash ./install_gcc.sh && rm install_gcc.sh
|
||||
|
||||
# Install lcov for C++ code coverage
|
||||
COPY ./common/install_lcov.sh install_lcov.sh
|
||||
RUN bash ./install_lcov.sh && rm install_lcov.sh
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# (optional) Install UCC
|
||||
ARG UCX_COMMIT
|
||||
ARG UCC_COMMIT
|
||||
ENV UCX_COMMIT $UCX_COMMIT
|
||||
ENV UCC_COMMIT $UCC_COMMIT
|
||||
ENV UCX_HOME /usr
|
||||
ENV UCC_HOME /usr
|
||||
ADD ./common/install_ucc.sh install_ucc.sh
|
||||
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
|
||||
RUN rm install_ucc.sh
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
COPY ./common/install_protobuf.sh install_protobuf.sh
|
||||
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV and ffmpeg
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh install_vision.sh
|
||||
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install Android NDK
|
||||
ARG ANDROID
|
||||
ARG ANDROID_NDK
|
||||
ARG GRADLE_VERSION
|
||||
COPY ./common/install_android.sh install_android.sh
|
||||
COPY ./android/AndroidManifest.xml AndroidManifest.xml
|
||||
COPY ./android/build.gradle build.gradle
|
||||
RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi
|
||||
RUN rm install_android.sh
|
||||
RUN rm AndroidManifest.xml
|
||||
RUN rm build.gradle
|
||||
ENV INSTALLED_ANDROID ${ANDROID}
|
||||
|
||||
# (optional) Install Vulkan SDK
|
||||
ARG VULKAN_SDK_VERSION
|
||||
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
|
||||
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
|
||||
RUN rm install_vulkan_sdk.sh
|
||||
|
||||
# (optional) Install swiftshader
|
||||
ARG SWIFTSHADER
|
||||
COPY ./common/install_swiftshader.sh install_swiftshader.sh
|
||||
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
|
||||
RUN rm install_swiftshader.sh
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
COPY ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
RUN rm install_openssl.sh
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
|
||||
# Add jni.h for java host build
|
||||
COPY ./common/install_jni.sh install_jni.sh
|
||||
COPY ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
# Install Open MPI for CUDA
|
||||
COPY ./common/install_openmpi.sh install_openmpi.sh
|
||||
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
|
||||
RUN rm install_openmpi.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
||||
|
||||
# AWS specific CUDA build guidance
|
||||
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||
ENV CUDA_PATH /usr/local/cuda
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
|
@ -0,0 +1,14 @@
|
|||
# Jenkins
|
||||
|
||||
The scripts in this directory are the entrypoint for testing ONNX exporter.
|
||||
|
||||
The environment variable `BUILD_ENVIRONMENT` is expected to be set to
|
||||
the build environment you intend to test. It is a hint for the build
|
||||
and test scripts to configure Caffe2 a certain way and include/exclude
|
||||
tests. Docker images, they equal the name of the image itself. For
|
||||
example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
|
||||
built on Jenkins and are used in triggered builds already have this
|
||||
environment variable set in their manifest. Also see
|
||||
`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
|
||||
|
||||
Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.
|
|
@ -0,0 +1,19 @@
|
|||
set -ex
|
||||
|
||||
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
|
||||
TEST_DIR="$ROOT_DIR/test"
|
||||
pytest_reports_dir="${TEST_DIR}/test-reports/python"
|
||||
|
||||
# Figure out which Python to use
|
||||
PYTHON="$(which python)"
|
||||
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
|
||||
PYTHON=$(which "python${BASH_REMATCH[1]}")
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
|
||||
unset HIP_PLATFORM
|
||||
fi
|
||||
|
||||
mkdir -p "$pytest_reports_dir" || true
|
|
@ -0,0 +1,74 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
|
||||
pip install click mock tabulate networkx==2.0
|
||||
pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
|
||||
fi
|
||||
|
||||
# Skip tests in environments where they are not built/applicable
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
echo 'Skipping tests'
|
||||
exit 0
|
||||
fi
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
|
||||
# temporary to locate some kernel issues on the CI nodes
|
||||
export HSAKMT_DEBUG_LEVEL=4
|
||||
fi
|
||||
# These additional packages are needed for circleci ROCm builds.
|
||||
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
|
||||
# Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
|
||||
# defaults installs the most recent networkx version, so we install this lower
|
||||
# version explicitly before scikit-image pulls it in as a dependency
|
||||
pip install networkx==2.0
|
||||
# click - onnx
|
||||
pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Python tests #
|
||||
################################################################################
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# If pip is installed as root, we must use sudo.
|
||||
# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
|
||||
PIP=$(which pip)
|
||||
PIP_USER=$(stat --format '%U' $PIP)
|
||||
CURRENT_USER=$(id -u -n)
|
||||
if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
|
||||
MAYBE_SUDO=sudo
|
||||
fi
|
||||
|
||||
# Uninstall pre-installed hypothesis and coverage to use an older version as newer
|
||||
# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
|
||||
$MAYBE_SUDO pip -q uninstall -y hypothesis
|
||||
$MAYBE_SUDO pip -q uninstall -y coverage
|
||||
|
||||
# "pip install hypothesis==3.44.6" from official server is unreliable on
|
||||
# CircleCI, so we host a copy on S3 instead
|
||||
$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
|
||||
$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
|
||||
$MAYBE_SUDO pip -q install hypothesis==4.57.1
|
||||
|
||||
##############
|
||||
# ONNX tests #
|
||||
##############
|
||||
if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
|
||||
pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
|
||||
pip install -q --user transformers==4.25.1
|
||||
pip install -q --user ninja flatbuffers==2.0 numpy==1.22.4 onnxruntime==1.14.0 beartype==0.10.4
|
||||
# TODO: change this when onnx 1.13.1 is released.
|
||||
pip install --no-use-pep517 'onnx @ git+https://github.com/onnx/onnx@e192ba01e438d22ca2dedd7956e28e3551626c91'
|
||||
# TODO: change this when onnx-script is on testPypi
|
||||
pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@a71e35bcd72537bf7572536ee57250a0c0488bf6'
|
||||
# numba requires numpy <= 1.20, onnxruntime requires numpy >= 1.21.
|
||||
# We don't actually need it for our tests, but it's imported if it's present, so uninstall.
|
||||
pip uninstall -q --yes numba
|
||||
# JIT C++ extensions require ninja, so put it into PATH.
|
||||
export PATH="/var/lib/jenkins/.local/bin:$PATH"
|
||||
"$ROOT_DIR/scripts/onnx/test.sh"
|
||||
fi
|
|
@ -0,0 +1,4 @@
|
|||
source-path=SCRIPTDIR
|
||||
|
||||
# we'd like to enable --external-sources here but can't
|
||||
# https://github.com/koalaman/shellcheck/issues/1818
|
|
@ -0,0 +1,42 @@
|
|||
This directory contains scripts for our continuous integration.
|
||||
|
||||
One important thing to keep in mind when reading the scripts here is
|
||||
that they are all based off of Docker images, which we build for each of
|
||||
the various system configurations we want to run on Jenkins. This means
|
||||
it is very easy to run these tests yourself:
|
||||
|
||||
1. Figure out what Docker image you want. The general template for our
|
||||
images look like:
|
||||
``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
|
||||
where ``$BUILD_ENVIRONMENT`` is one of the build environments
|
||||
enumerated in
|
||||
[pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.ci/docker/build.sh). The dockerfile used by jenkins can be found under the `.ci` [directory](https://github.com/pytorch/pytorch/blob/master/.ci/docker)
|
||||
|
||||
2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
|
||||
run one of the scripts in this directory.
|
||||
|
||||
The Docker images are designed so that any "reasonable" build commands
|
||||
will work; if you look in [build.sh](build.sh) you will see that it is a
|
||||
very simple script. This is intentional. Idiomatic build instructions
|
||||
should work inside all of our Docker images. You can tweak the commands
|
||||
however you need (e.g., in case you want to rebuild with DEBUG, or rerun
|
||||
the build with higher verbosity, etc.).
|
||||
|
||||
We have to do some work to make this so. Here is a summary of the
|
||||
mechanisms we use:
|
||||
|
||||
- We install binaries to directories like `/usr/local/bin` which
|
||||
are automatically part of your PATH.
|
||||
|
||||
- We add entries to the PATH using Docker ENV variables (so
|
||||
they apply when you enter Docker) and `/etc/environment` (so they
|
||||
continue to apply even if you sudo), instead of modifying
|
||||
`PATH` in our build scripts.
|
||||
|
||||
- We use `/etc/ld.so.conf.d` to register directories containing
|
||||
shared libraries, instead of modifying `LD_LIBRARY_PATH` in our
|
||||
build scripts.
|
||||
|
||||
- We reroute well known paths like `/usr/bin/gcc` to alternate
|
||||
implementations with `update-alternatives`, instead of setting
|
||||
`CC` and `CXX` in our implementations.
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
echo "Clang version:"
|
||||
clang --version
|
||||
|
||||
python tools/stats/export_test_times.py
|
||||
|
||||
# detect_leaks=0: Python is very leaky, so we need suppress it
|
||||
# symbolize=1: Gives us much better errors when things go wrong
|
||||
export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=1:symbolize=1:detect_odr_violation=0
|
||||
if [ -n "$(which conda)" ]; then
|
||||
export CMAKE_PREFIX_PATH=/opt/conda
|
||||
fi
|
||||
|
||||
# TODO: Make the ASAN flags a centralized env var and unify with USE_ASAN option
|
||||
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
|
||||
CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-address-use-after-scope -shared-libasan" \
|
||||
USE_ASAN=1 USE_CUDA=0 USE_MKLDNN=0 \
|
||||
python setup.py bdist_wheel
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
# Test building via the sdist source tarball
|
||||
python setup.py sdist
|
||||
mkdir -p /tmp/tmp
|
||||
pushd /tmp/tmp
|
||||
tar zxf "$(dirname "${BASH_SOURCE[0]}")/../../dist/"*.tar.gz
|
||||
cd torch-*
|
||||
python setup.py build --cmake-only
|
||||
popd
|
||||
|
||||
print_sccache_stats
|
||||
|
||||
assert_git_not_dirty
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
|
||||
set -eu -o pipefail
|
||||
|
||||
# This script uses linux host toolchain + mobile build options in order to
|
||||
# build & test mobile libtorch without having to setup Android/iOS
|
||||
# toolchain/simulator.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
# Install torch & torchvision - used to download & trace test model.
|
||||
# Ideally we should use the libtorch built on the PR so that backward
|
||||
# incompatible changes won't break this script - but it will significantly slow
|
||||
# down mobile CI jobs.
|
||||
# Here we install nightly instead of stable so that we have an option to
|
||||
# temporarily skip mobile CI jobs on BC-breaking PRs until they are in nightly.
|
||||
retry pip install --pre torch torchvision \
|
||||
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html \
|
||||
--progress-bar off
|
||||
|
||||
# Run end-to-end process of building mobile library, linking into the predictor
|
||||
# binary, and running forward pass with a real model.
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
|
||||
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
|
||||
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
|
||||
test/mobile/lightweight_dispatch/build.sh
|
||||
else
|
||||
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
|
||||
fi
|
||||
|
||||
print_sccache_stats
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
echo "Clang version:"
|
||||
clang --version
|
||||
|
||||
python tools/stats/export_test_times.py
|
||||
|
||||
if [ -n "$(which conda)" ]; then
|
||||
export CMAKE_PREFIX_PATH=/opt/conda
|
||||
fi
|
||||
|
||||
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
|
||||
CFLAGS="-fsanitize=thread" \
|
||||
USE_TSAN=1 USE_CUDA=0 USE_MKLDNN=0 \
|
||||
python setup.py bdist_wheel
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
print_sccache_stats
|
||||
|
||||
assert_git_not_dirty
|
|
@ -0,0 +1,318 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-clang7-asan* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-clang7-tsan* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-tsan.sh" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
|
||||
fi
|
||||
|
||||
echo "Python version:"
|
||||
python --version
|
||||
|
||||
echo "GCC version:"
|
||||
gcc --version
|
||||
|
||||
echo "CMake version:"
|
||||
cmake --version
|
||||
|
||||
echo "Environment variables:"
|
||||
env
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
echo "NVCC version:"
|
||||
nvcc --version
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
# TODO: there is a linking issue when building with UCC using clang,
|
||||
# disable it for now and to be fix later.
|
||||
export USE_UCC=1
|
||||
export USE_SYSTEM_UCC=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"caffe2"* ]]; then
|
||||
echo "Caffe2 build is ON"
|
||||
export BUILD_CAFFE2=ON
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
|
||||
export ATEN_THREADING=TBB
|
||||
export USE_TBB=1
|
||||
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
||||
export ATEN_THREADING=NATIVE
|
||||
fi
|
||||
|
||||
# Enable LLVM dependency for TensorExpr testing
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
export USE_LLVM=/opt/rocm/llvm
|
||||
export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
|
||||
else
|
||||
export USE_LLVM=/opt/llvm
|
||||
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
|
||||
fi
|
||||
|
||||
if ! which conda; then
|
||||
# In ROCm CIs, we are doing cross compilation on build machines with
|
||||
# intel cpu and later run tests on machines with amd cpu.
|
||||
# Also leave out two builds to make sure non-mkldnn builds still work.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
||||
export USE_MKLDNN=1
|
||||
else
|
||||
export USE_MKLDNN=0
|
||||
fi
|
||||
else
|
||||
export CMAKE_PREFIX_PATH=/opt/conda
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
|
||||
POSSIBLE_JAVA_HOMES=()
|
||||
POSSIBLE_JAVA_HOMES+=(/usr/local)
|
||||
POSSIBLE_JAVA_HOMES+=(/usr/lib/jvm/java-8-openjdk-amd64)
|
||||
POSSIBLE_JAVA_HOMES+=(/Library/Java/JavaVirtualMachines/*.jdk/Contents/Home)
|
||||
# Add the Windows-specific JNI
|
||||
POSSIBLE_JAVA_HOMES+=("$PWD/.circleci/windows-jni/")
|
||||
for JH in "${POSSIBLE_JAVA_HOMES[@]}" ; do
|
||||
if [[ -e "$JH/include/jni.h" ]] ; then
|
||||
# Skip if we're not on Windows but haven't found a JAVA_HOME
|
||||
if [[ "$JH" == "$PWD/.circleci/windows-jni/" && "$OSTYPE" != "msys" ]] ; then
|
||||
break
|
||||
fi
|
||||
echo "Found jni.h under $JH"
|
||||
export JAVA_HOME="$JH"
|
||||
export BUILD_JNI=ON
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ -z "$JAVA_HOME" ]; then
|
||||
echo "Did not find jni.h"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Use special scripts for Android builds
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
export ANDROID_NDK=/opt/ndk
|
||||
build_args=()
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
|
||||
build_args+=("-DANDROID_ABI=armeabi-v7a")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
|
||||
build_args+=("-DANDROID_ABI=arm64-v8a")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
|
||||
build_args+=("-DANDROID_ABI=x86")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
|
||||
build_args+=("-DANDROID_ABI=x86_64")
|
||||
fi
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
|
||||
build_args+=("-DUSE_VULKAN=ON")
|
||||
fi
|
||||
build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
|
||||
exec ./scripts/build_android.sh "${build_args[@]}" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
|
||||
export USE_VULKAN=1
|
||||
# shellcheck disable=SC1091
|
||||
source /var/lib/jenkins/vulkansdk/setup-env.sh
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
# hcc used to run out of memory, silently exiting without stopping
|
||||
# the build process, leaving undefined symbols in the shared lib,
|
||||
# causing undefined symbol errors when later running tests.
|
||||
# We used to set MAX_JOBS to 4 to avoid, but this is no longer an issue.
|
||||
if [ -z "$MAX_JOBS" ]; then
|
||||
export MAX_JOBS=$(($(nproc) - 1))
|
||||
fi
|
||||
|
||||
if [[ -n "$CI" && -z "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
# Set ROCM_ARCH to gfx906 for CI builds, if user doesn't override.
|
||||
echo "Limiting PYTORCH_ROCM_ARCH to gfx906 for CI builds"
|
||||
export PYTORCH_ROCM_ARCH="gfx906"
|
||||
fi
|
||||
|
||||
# hipify sources
|
||||
python tools/amd_build/build_amd.py
|
||||
fi
|
||||
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
|
||||
if [ -z "$MAX_JOBS" ]; then
|
||||
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
|
||||
export MAX_JOBS=$(($(nproc) - 1))
|
||||
fi
|
||||
fi
|
||||
|
||||
# TORCH_CUDA_ARCH_LIST must be passed from an environment variable
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* && -z "$TORCH_CUDA_ARCH_LIST" ]]; then
|
||||
echo "TORCH_CUDA_ARCH_LIST must be defined"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
export CC=clang
|
||||
export CXX=clang++
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
|
||||
export USE_PER_OPERATOR_HEADERS=0
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
|
||||
export USE_PRECOMPILED_HEADERS=1
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
|
||||
export USE_GLOO_WITH_OPENSSL=ON
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
||||
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
|
||||
set -e
|
||||
|
||||
get_bazel
|
||||
|
||||
# Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
|
||||
# the runner
|
||||
BAZEL_MEM_LIMIT="--local_ram_resources=HOST_RAM*.8"
|
||||
BAZEL_CPU_LIMIT="--local_cpu_resources=HOST_CPUS-1"
|
||||
|
||||
tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" //...
|
||||
# Build torch, the Python module, and tests for CPU-only
|
||||
tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" --config=cpu-only :torch :_C.so :all_tests
|
||||
|
||||
else
|
||||
# check that setup.py would fail with bad arguments
|
||||
echo "The next three invocations are expected to fail with invalid command error messages."
|
||||
( ! get_exit_code python setup.py bad_argument )
|
||||
( ! get_exit_code python setup.py clean] )
|
||||
( ! get_exit_code python setup.py clean bad_argument )
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
|
||||
|
||||
# rocm builds fail when WERROR=1
|
||||
# XLA test build fails when WERROR=1
|
||||
# set only when building other architectures
|
||||
# or building non-XLA tests.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
else
|
||||
python setup.py bdist_wheel
|
||||
fi
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
# TODO: I'm not sure why, but somehow we lose verbose commands
|
||||
set -x
|
||||
|
||||
assert_git_not_dirty
|
||||
# Copy ninja build logs to dist folder
|
||||
mkdir -p dist
|
||||
if [ -f build/.ninja_log ]; then
|
||||
cp build/.ninja_log dist
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
# remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
|
||||
sudo rm -f /opt/cache/bin/cc
|
||||
sudo rm -f /opt/cache/bin/c++
|
||||
sudo rm -f /opt/cache/bin/gcc
|
||||
sudo rm -f /opt/cache/bin/g++
|
||||
pushd /opt/rocm/llvm/bin
|
||||
if [[ -d original ]]; then
|
||||
sudo mv original/clang .
|
||||
sudo mv original/clang++ .
|
||||
fi
|
||||
sudo rm -rf original
|
||||
popd
|
||||
fi
|
||||
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}
|
||||
CUSTOM_TEST_USE_ROCM=$([[ "$BUILD_ENVIRONMENT" == *rocm* ]] && echo "ON" || echo "OFF")
|
||||
CUSTOM_TEST_MODULE_PATH="${PWD}/cmake/public"
|
||||
mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
|
||||
|
||||
# Build custom operator tests.
|
||||
CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
|
||||
CUSTOM_OP_TEST="$PWD/test/custom_operator"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
mkdir -p "$CUSTOM_OP_BUILD"
|
||||
pushd "$CUSTOM_OP_BUILD"
|
||||
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
|
||||
# Build jit hook tests
|
||||
JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
|
||||
JIT_HOOK_TEST="$PWD/test/jit_hooks"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
mkdir -p "$JIT_HOOK_BUILD"
|
||||
pushd "$JIT_HOOK_BUILD"
|
||||
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
|
||||
# Build custom backend tests.
|
||||
CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
|
||||
CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
|
||||
python --version
|
||||
mkdir -p "$CUSTOM_BACKEND_BUILD"
|
||||
pushd "$CUSTOM_BACKEND_BUILD"
|
||||
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
else
|
||||
# Test no-Python build
|
||||
echo "Building libtorch"
|
||||
|
||||
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
|
||||
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
|
||||
# 16 CPUs
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
|
||||
# NB: Install outside of source directory (at the same level as the root
|
||||
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
|
||||
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
|
||||
mkdir -p ../cpp-build/caffe2
|
||||
pushd ../cpp-build/caffe2
|
||||
WERROR=1 VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
|
||||
popd
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
|
||||
# export test times so that potential sharded tests that'll branch off this build will use consistent data
|
||||
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
||||
python tools/stats/export_test_times.py
|
||||
fi
|
||||
|
||||
print_sccache_stats
|
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# This script can also be used to test whether your diff changes any codegen output.
|
||||
#
|
||||
# Run it before and after your change:
|
||||
# .ci/pytorch/codegen-test.sh <baseline_output_dir>
|
||||
# .ci/pytorch/codegen-test.sh <test_output_dir>
|
||||
#
|
||||
# Then run diff to compare the generated files:
|
||||
# diff -Naur <baseline_output_dir> <test_output_dir>
|
||||
|
||||
set -eu -o pipefail
|
||||
|
||||
if [ "$#" -eq 0 ]; then
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
OUT="$(dirname "${BASH_SOURCE[0]}")/../../codegen_result"
|
||||
else
|
||||
OUT=$1
|
||||
fi
|
||||
|
||||
set -x
|
||||
|
||||
rm -rf "$OUT"
|
||||
|
||||
# aten codegen
|
||||
python -m torchgen.gen \
|
||||
-s aten/src/ATen \
|
||||
-d "$OUT"/torch/share/ATen
|
||||
|
||||
# torch codegen
|
||||
python -m tools.setup_helpers.generate_code \
|
||||
--install_dir "$OUT"
|
||||
|
||||
# pyi codegen
|
||||
mkdir -p "$OUT"/pyi/torch/_C
|
||||
mkdir -p "$OUT"/pyi/torch/nn
|
||||
python -m tools.pyi.gen_pyi \
|
||||
--native-functions-path aten/src/ATen/native/native_functions.yaml \
|
||||
--tags-path aten/src/ATen/native/tags.yaml \
|
||||
--deprecated-functions-path tools/autograd/deprecated.yaml \
|
||||
--out "$OUT"/pyi
|
||||
|
||||
# autograd codegen (called by torch codegen but can run independently)
|
||||
python -m tools.autograd.gen_autograd \
|
||||
"$OUT"/torch/share/ATen/Declarations.yaml \
|
||||
aten/src/ATen/native/native_functions.yaml \
|
||||
aten/src/ATen/native/tags.yaml \
|
||||
"$OUT"/autograd \
|
||||
tools/autograd
|
||||
|
||||
# annotated_fn_args codegen (called by torch codegen but can run independently)
|
||||
mkdir -p "$OUT"/annotated_fn_args
|
||||
python -m tools.autograd.gen_annotated_fn_args \
|
||||
aten/src/ATen/native/native_functions.yaml \
|
||||
aten/src/ATen/native/tags.yaml \
|
||||
"$OUT"/annotated_fn_args \
|
||||
tools/autograd
|
|
@ -0,0 +1,58 @@
|
|||
#!/bin/bash
|
||||
# Required environment variables:
|
||||
# $BUILD_ENVIRONMENT (should be set by your Docker image)
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
|
||||
# Save the absolute path in case later we chdir (as occurs in the gpu perf test)
|
||||
script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
# Save sccache logs to file
|
||||
sccache --stop-server > /dev/null 2>&1 || true
|
||||
rm -f ~/sccache_error.log || true
|
||||
|
||||
function sccache_epilogue() {
|
||||
echo "::group::Sccache Compilation Log"
|
||||
echo '=================== sccache compilation log ==================='
|
||||
python "$script_dir/print_sccache_log.py" ~/sccache_error.log 2>/dev/null || true
|
||||
echo '=========== If your build fails, please take a look at the log above for possible reasons ==========='
|
||||
sccache --show-stats
|
||||
sccache --stop-server || true
|
||||
echo "::endgroup::"
|
||||
}
|
||||
|
||||
# Register the function here so that the error log can be printed even when
|
||||
# sccache fails to start, i.e. timeout error
|
||||
trap_add sccache_epilogue EXIT
|
||||
|
||||
if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
|
||||
# sccache --start-server seems to hang forever on self hosted runners for GHA
|
||||
# so let's just go ahead and skip the --start-server altogether since it seems
|
||||
# as though sccache still gets used even when the sscache server isn't started
|
||||
# explicitly
|
||||
echo "Skipping sccache server initialization, setting environment variables"
|
||||
export SCCACHE_IDLE_TIMEOUT=1200
|
||||
export SCCACHE_ERROR_LOG=~/sccache_error.log
|
||||
export RUST_LOG=sccache::server=error
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
|
||||
else
|
||||
# increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
|
||||
# https://github.com/pytorch/pytorch/pull/16645
|
||||
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=1200 RUST_LOG=sccache::server=error sccache --start-server
|
||||
fi
|
||||
|
||||
# Report sccache stats for easier debugging
|
||||
sccache --zero-stats
|
||||
fi
|
||||
|
||||
if which ccache > /dev/null; then
|
||||
# Report ccache stats for easier debugging
|
||||
ccache --zero-stats
|
||||
ccache --show-stats
|
||||
function ccache_epilogue() {
|
||||
ccache --show-stats
|
||||
}
|
||||
trap_add ccache_epilogue EXIT
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,28 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Common setup for all Jenkins scripts
|
||||
# shellcheck source=./common_utils.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
set -ex
|
||||
|
||||
# Required environment variables:
|
||||
# $BUILD_ENVIRONMENT (should be set by your Docker image)
|
||||
|
||||
# Figure out which Python to use for ROCm
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
|
||||
unset HIP_PLATFORM
|
||||
export PYTORCH_TEST_WITH_ROCM=1
|
||||
# temporary to locate some kernel issues on the CI nodes
|
||||
export HSAKMT_DEBUG_LEVEL=4
|
||||
# improve rccl performance for distributed tests
|
||||
export HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
fi
|
||||
|
||||
# TODO: Renable libtorch testing for MacOS, see https://github.com/pytorch/pytorch/issues/62598
|
||||
# shellcheck disable=SC2034
|
||||
BUILD_TEST_LIBTORCH=0
|
||||
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Common util **functions** that can be sourced in other scripts.
|
||||
|
||||
# note: printf is used instead of echo to avoid backslash
|
||||
# processing and to properly handle values that begin with a '-'.
|
||||
|
||||
log() { printf '%s\n' "$*"; }
|
||||
error() { log "ERROR: $*" >&2; }
|
||||
fatal() { error "$@"; exit 1; }
|
||||
|
||||
retry () {
|
||||
"$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
|
||||
}
|
||||
|
||||
# compositional trap taken from https://stackoverflow.com/a/7287873/23845
|
||||
# appends a command to a trap
|
||||
#
|
||||
# - 1st arg: code to add
|
||||
# - remaining args: names of traps to modify
|
||||
#
|
||||
trap_add() {
|
||||
trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error"
|
||||
for trap_add_name in "$@"; do
|
||||
trap -- "$(
|
||||
# helper fn to get existing trap command from output
|
||||
# of trap -p
|
||||
extract_trap_cmd() { printf '%s\n' "$3"; }
|
||||
# print existing trap command with newline
|
||||
eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
|
||||
# print the new trap command
|
||||
printf '%s\n' "${trap_add_cmd}"
|
||||
)" "${trap_add_name}" \
|
||||
|| fatal "unable to add to trap ${trap_add_name}"
|
||||
done
|
||||
}
|
||||
# set the trace attribute for the above function. this is
|
||||
# required to modify DEBUG or RETURN traps because functions don't
|
||||
# inherit them unless the trace attribute is set
|
||||
declare -f -t trap_add
|
||||
|
||||
function assert_git_not_dirty() {
|
||||
# TODO: we should add an option to `build_amd.py` that reverts the repo to
|
||||
# an unmodified state.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
|
||||
git_status=$(git status --porcelain)
|
||||
if [[ $git_status ]]; then
|
||||
echo "Build left local git repository checkout dirty"
|
||||
echo "git status --porcelain:"
|
||||
echo "${git_status}"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function pip_install_whl() {
|
||||
# This is used to install PyTorch and other build artifacts wheel locally
|
||||
# without using any network connection
|
||||
python3 -mpip install --no-index --no-deps "$@"
|
||||
}
|
||||
|
||||
function pip_install() {
|
||||
# retry 3 times
|
||||
# old versions of pip don't have the "--progress-bar" flag
|
||||
pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
|
||||
pip install "$@" || pip install "$@" || pip install "$@"
|
||||
}
|
||||
|
||||
function pip_uninstall() {
|
||||
# uninstall 2 times
|
||||
pip uninstall -y "$@" || pip uninstall -y "$@"
|
||||
}
|
||||
|
||||
function get_exit_code() {
|
||||
set +e
|
||||
"$@"
|
||||
retcode=$?
|
||||
set -e
|
||||
return $retcode
|
||||
}
|
||||
|
||||
function get_bazel() {
|
||||
if [[ $(uname) == "Darwin" ]]; then
|
||||
# download bazel version
|
||||
retry curl https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-darwin-x86_64 -Lo tools/bazel
|
||||
# verify content
|
||||
echo '74d93848f0c9d592e341e48341c53c87e3cb304a54a2a1ee9cff3df422f0b23c tools/bazel' | shasum -a 256 -c >/dev/null
|
||||
else
|
||||
# download bazel version
|
||||
retry curl https://ossci-linux.s3.amazonaws.com/bazel-4.2.1-linux-x86_64 -o tools/bazel
|
||||
# verify content
|
||||
echo '1a4f3a3ce292307bceeb44f459883859c793436d564b95319aacb8af1f20557c tools/bazel' | shasum -a 256 -c >/dev/null
|
||||
fi
|
||||
|
||||
chmod +x tools/bazel
|
||||
}
|
||||
|
||||
function install_monkeytype {
|
||||
# Install MonkeyType
|
||||
pip_install MonkeyType
|
||||
}
|
||||
|
||||
|
||||
function get_pinned_commit() {
|
||||
cat .github/ci_commit_pins/"${1}".txt
|
||||
}
|
||||
|
||||
function install_torchtext() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit text)
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${commit}"
|
||||
}
|
||||
|
||||
function install_torchvision() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit vision)
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
|
||||
}
|
||||
|
||||
function clone_pytorch_xla() {
|
||||
if [[ ! -d ./xla ]]; then
|
||||
git clone --recursive -b r2.0 --quiet https://github.com/pytorch/xla.git
|
||||
pushd xla
|
||||
# pin the xla hash so that we don't get broken by changes to xla
|
||||
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
|
||||
git submodule sync
|
||||
git submodule update --init --recursive
|
||||
popd
|
||||
fi
|
||||
}
|
||||
|
||||
function install_filelock() {
|
||||
pip_install filelock
|
||||
}
|
||||
|
||||
function install_triton() {
|
||||
local commit
|
||||
if [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
||||
echo "skipping triton due to rocm"
|
||||
else
|
||||
commit=$(get_pinned_commit triton)
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *gcc7* ]]; then
|
||||
# Trition needs gcc-9 to build
|
||||
sudo apt-get install -y g++-9
|
||||
CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
# Trition needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get install -y g++-9
|
||||
CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
|
||||
else
|
||||
pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
|
||||
fi
|
||||
pip_install --user jinja2
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_torchdeploy_deps(){
|
||||
conda install -y -n "py_${ANACONDA_PYTHON_VERSION}" "libpython-static=${ANACONDA_PYTHON_VERSION}"
|
||||
local CC
|
||||
local CXX
|
||||
CC="$(which gcc)"
|
||||
CXX="$(which g++)"
|
||||
export CC
|
||||
export CXX
|
||||
pip install --upgrade pip
|
||||
}
|
||||
|
||||
function checkout_install_torchdeploy() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit multipy)
|
||||
setup_torchdeploy_deps
|
||||
pushd ..
|
||||
git clone --recurse-submodules https://github.com/pytorch/multipy.git
|
||||
pushd multipy
|
||||
git checkout "${commit}"
|
||||
python multipy/runtime/example/generate_examples.py
|
||||
pip install -e .
|
||||
popd
|
||||
popd
|
||||
}
|
||||
|
||||
function test_torch_deploy(){
|
||||
pushd ..
|
||||
pushd multipy
|
||||
./multipy/runtime/build/test_deploy
|
||||
popd
|
||||
popd
|
||||
}
|
||||
|
||||
function install_huggingface() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit huggingface)
|
||||
pip_install pandas
|
||||
pip_install scipy
|
||||
pip_install "git+https://github.com/huggingface/transformers.git@${commit}#egg=transformers"
|
||||
}
|
||||
|
||||
function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
pip_install pandas
|
||||
pip_install scipy
|
||||
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
|
||||
}
|
||||
|
||||
function checkout_install_torchbench() {
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout no_torchaudio
|
||||
|
||||
if [ "$1" ]; then
|
||||
python install.py --continue_on_fail models "$@"
|
||||
else
|
||||
# Occasionally the installation may fail on one model but it is ok to continue
|
||||
# to install and test other models
|
||||
python install.py --continue_on_fail
|
||||
fi
|
||||
popd
|
||||
}
|
||||
|
||||
function test_functorch() {
|
||||
python test/run_test.py --functorch --verbose
|
||||
}
|
||||
|
||||
function print_sccache_stats() {
|
||||
echo 'PyTorch Build Statistics'
|
||||
sccache --show-stats
|
||||
|
||||
if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
|
||||
sccache --show-stats --stats-format json | jq .stats \
|
||||
> "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
|
||||
else
|
||||
echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
|
||||
fi
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
from datetime import datetime, timedelta
|
||||
from tempfile import mkdtemp
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography import x509
|
||||
from cryptography.x509.oid import NameOID
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
|
||||
temp_dir = mkdtemp()
|
||||
print(temp_dir)
|
||||
|
||||
|
||||
def genrsa(path):
|
||||
key = rsa.generate_private_key(
|
||||
public_exponent=65537,
|
||||
key_size=2048,
|
||||
)
|
||||
with open(path, "wb") as f:
|
||||
f.write(key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
||||
encryption_algorithm=serialization.NoEncryption(),
|
||||
))
|
||||
return key
|
||||
|
||||
|
||||
def create_cert(path, C, ST, L, O, key):
|
||||
subject = issuer = x509.Name([
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
|
||||
])
|
||||
cert = x509.CertificateBuilder().subject_name(
|
||||
subject
|
||||
).issuer_name(
|
||||
issuer
|
||||
).public_key(
|
||||
key.public_key()
|
||||
).serial_number(
|
||||
x509.random_serial_number()
|
||||
).not_valid_before(
|
||||
datetime.utcnow()
|
||||
).not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.utcnow() + timedelta(days=10)
|
||||
).add_extension(
|
||||
x509.BasicConstraints(ca=True, path_length=None), critical=True,
|
||||
).sign(key, hashes.SHA256())
|
||||
# Write our certificate out to disk.
|
||||
with open(path, "wb") as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
return cert
|
||||
|
||||
|
||||
def create_req(path, C, ST, L, O, key):
|
||||
csr = x509.CertificateSigningRequestBuilder().subject_name(x509.Name([
|
||||
# Provide various details about who we are.
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
|
||||
])).sign(key, hashes.SHA256())
|
||||
with open(path, "wb") as f:
|
||||
f.write(csr.public_bytes(serialization.Encoding.PEM))
|
||||
return csr
|
||||
|
||||
|
||||
def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
|
||||
cert = x509.CertificateBuilder().subject_name(
|
||||
csr_cert.subject
|
||||
).issuer_name(
|
||||
ca_cert.subject
|
||||
).public_key(
|
||||
csr_cert.public_key()
|
||||
).serial_number(
|
||||
x509.random_serial_number()
|
||||
).not_valid_before(
|
||||
datetime.utcnow()
|
||||
).not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.utcnow() + timedelta(days=10)
|
||||
# Sign our certificate with our private key
|
||||
).sign(private_ca_key, hashes.SHA256())
|
||||
with open(path, "wb") as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
return cert
|
||||
|
||||
|
||||
ca_key = genrsa(temp_dir + "/ca.key")
|
||||
ca_cert = create_cert(temp_dir + "/ca.pem", u"US", u"New York", u"New York", u"Gloo Certificate Authority", ca_key)
|
||||
|
||||
pkey = genrsa(temp_dir + "/pkey.key")
|
||||
csr = create_req(temp_dir + "/csr.csr", u"US", u"California", u"San Francisco", u"Gloo Testing Company", pkey)
|
||||
|
||||
cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
docker build -t pytorch .
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch docs"
|
||||
|
||||
cd docs
|
||||
pip_install -r requirements.txt
|
||||
make doctest
|
|
@ -0,0 +1 @@
|
|||
raise ModuleNotFoundError("Sorry PyTorch, but our NumPy is in the other folder")
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-build* ]]; then
|
||||
# shellcheck source=./macos-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/macos-build.sh"
|
||||
fi
|
||||
|
||||
if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-test* ]]; then
|
||||
# shellcheck source=./macos-test.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/macos-test.sh"
|
||||
fi
|
|
@ -0,0 +1,80 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
# shellcheck source=./macos-common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
# Build PyTorch
|
||||
if [ -z "${CI}" ]; then
|
||||
export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer
|
||||
fi
|
||||
|
||||
# This helper function wraps calls to binaries with sccache, but only if they're not already wrapped with sccache.
|
||||
# For example, `clang` will be `sccache clang`, but `sccache clang` will not become `sccache sccache clang`.
|
||||
# The way this is done is by detecting the command of the parent pid of the current process and checking whether
|
||||
# that is sccache, and wrapping sccache around the process if its parent were not already sccache.
|
||||
function write_sccache_stub() {
|
||||
output=$1
|
||||
binary=$(basename "${output}")
|
||||
|
||||
printf "#!/bin/sh\nif [ \$(ps auxc \$(ps auxc -o ppid \$\$ | grep \$\$ | rev | cut -d' ' -f1 | rev) | tr '\\\\n' ' ' | rev | cut -d' ' -f2 | rev) != sccache ]; then\n exec sccache %s \"\$@\"\nelse\n exec %s \"\$@\"\nfi" "$(which "${binary}")" "$(which "${binary}")" > "${output}"
|
||||
chmod a+x "${output}"
|
||||
}
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
# Create temp directory for sccache shims
|
||||
tmp_dir=$(mktemp -d)
|
||||
trap 'rm -rfv ${tmp_dir}' EXIT
|
||||
write_sccache_stub "${tmp_dir}/clang++"
|
||||
write_sccache_stub "${tmp_dir}/clang"
|
||||
|
||||
export PATH="${tmp_dir}:$PATH"
|
||||
fi
|
||||
|
||||
cross_compile_arm64() {
|
||||
# Cross compilation for arm64
|
||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
||||
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_x86_64() {
|
||||
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
build_lite_interpreter() {
|
||||
echo "Testing libtorch (lite interpreter)."
|
||||
|
||||
CPP_BUILD="$(pwd)/../cpp_build"
|
||||
# Ensure the removal of the tmp directory
|
||||
trap 'rm -rfv ${CPP_BUILD}' EXIT
|
||||
rm -rf "${CPP_BUILD}"
|
||||
mkdir -p "${CPP_BUILD}/caffe2"
|
||||
|
||||
# It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
|
||||
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
|
||||
pushd "${CPP_BUILD}/caffe2" || exit
|
||||
VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
|
||||
popd || exit
|
||||
|
||||
"${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
|
||||
}
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
|
||||
cross_compile_arm64
|
||||
elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
|
||||
export BUILD_LITE_INTERPRETER=1
|
||||
build_lite_interpreter
|
||||
else
|
||||
compile_x86_64
|
||||
fi
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
print_sccache_stats
|
||||
fi
|
||||
|
||||
python tools/stats/export_test_times.py
|
||||
|
||||
assert_git_not_dirty
|
|
@ -0,0 +1,14 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Common prelude for macos-build.sh and macos-test.sh
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
sysctl -a | grep machdep.cpu
|
||||
|
||||
# These are required for both the build job and the test job.
|
||||
# In the latter to test cpp extensions.
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.9
|
||||
export CXX=clang++
|
||||
export CC=clang
|
|
@ -0,0 +1,186 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
# shellcheck source=./macos-common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
|
||||
|
||||
if [[ -n "$CONDA_ENV" ]]; then
|
||||
# Use binaries under conda environment
|
||||
export PATH="$CONDA_ENV/bin":$PATH
|
||||
fi
|
||||
|
||||
# Test that OpenMP is enabled for non-arm64 build
|
||||
if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
|
||||
pushd test
|
||||
if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
|
||||
echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
|
||||
exit 1
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
|
||||
setup_test_python() {
|
||||
# The CircleCI worker hostname doesn't resolve to an address.
|
||||
# This environment variable makes ProcessGroupGloo default to
|
||||
# using the address associated with the loopback interface.
|
||||
export GLOO_SOCKET_IFNAME=lo0
|
||||
echo "Ninja version: $(ninja --version)"
|
||||
|
||||
# Increase default limit on open file handles from 256 to 1024
|
||||
ulimit -n 1024
|
||||
}
|
||||
|
||||
test_python_all() {
|
||||
setup_test_python
|
||||
|
||||
time python test/run_test.py --verbose --exclude-jit-executor
|
||||
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_python_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
setup_test_python
|
||||
|
||||
time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS"
|
||||
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_libtorch() {
|
||||
# C++ API
|
||||
|
||||
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
|
||||
# NB: Install outside of source directory (at the same level as the root
|
||||
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
|
||||
# But still clean it before we perform our own build.
|
||||
|
||||
echo "Testing libtorch"
|
||||
|
||||
CPP_BUILD="$PWD/../cpp-build"
|
||||
rm -rf "$CPP_BUILD"
|
||||
mkdir -p "$CPP_BUILD"/caffe2
|
||||
|
||||
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
|
||||
pushd "$CPP_BUILD"/caffe2
|
||||
VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
|
||||
popd
|
||||
|
||||
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
|
||||
# Unfortunately it seems like the test can't load from miniconda3
|
||||
# without these paths being set
|
||||
export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
|
||||
TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" "$CPP_BUILD"/caffe2/bin/test_api
|
||||
|
||||
assert_git_not_dirty
|
||||
fi
|
||||
}
|
||||
|
||||
print_cmake_info() {
|
||||
CMAKE_EXEC=$(which cmake)
|
||||
echo "$CMAKE_EXEC"
|
||||
|
||||
CONDA_INSTALLATION_DIR=$(dirname "$CMAKE_EXEC")
|
||||
# Print all libraries under cmake rpath for debugging
|
||||
ls -la "$CONDA_INSTALLATION_DIR/../lib"
|
||||
|
||||
export CMAKE_EXEC
|
||||
# Explicitly add conda env lib folder to cmake rpath to address the flaky issue
|
||||
# where cmake dependencies couldn't be found. This seems to point to how conda
|
||||
# links $CMAKE_EXEC to its package cache when cloning a new environment
|
||||
install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true
|
||||
# Adding the rpath will invalidate cmake signature, so signing it again here
|
||||
# to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid))
|
||||
# with an exit code 137 otherwise
|
||||
codesign -f -s - "${CMAKE_EXEC}" || true
|
||||
}
|
||||
|
||||
test_custom_backend() {
|
||||
print_cmake_info
|
||||
|
||||
echo "Testing custom backends"
|
||||
pushd test/custom_backend
|
||||
rm -rf build && mkdir build
|
||||
pushd build
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
|
||||
make VERBOSE=1
|
||||
popd
|
||||
|
||||
# Run Python tests and export a lowered module.
|
||||
python test_custom_backend.py -v
|
||||
python backend.py --export-module-to=model.pt
|
||||
# Run C++ tests using the exported module.
|
||||
build/test_custom_backend ./model.pt
|
||||
rm -f ./model.pt
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_custom_script_ops() {
|
||||
print_cmake_info
|
||||
|
||||
echo "Testing custom script operators"
|
||||
pushd test/custom_operator
|
||||
# Build the custom operator library.
|
||||
rm -rf build && mkdir build
|
||||
pushd build
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
|
||||
make VERBOSE=1
|
||||
popd
|
||||
|
||||
# Run tests Python-side and export a script module.
|
||||
python test_custom_ops.py -v
|
||||
python model.py --export-script-module=model.pt
|
||||
# Run tests C++-side and load the exported script module.
|
||||
build/test_custom_ops ./model.pt
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_jit_hooks() {
|
||||
print_cmake_info
|
||||
|
||||
echo "Testing jit hooks in cpp"
|
||||
pushd test/jit_hooks
|
||||
# Build the custom operator library.
|
||||
rm -rf build && mkdir build
|
||||
pushd build
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
|
||||
make VERBOSE=1
|
||||
popd
|
||||
|
||||
# Run tests Python-side and export a script module.
|
||||
python model.py --export-script-module=model
|
||||
# Run tests C++-side and load the exported script module.
|
||||
build/test_jit_hooks ./model
|
||||
popd
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *functorch* ]]; then
|
||||
test_functorch
|
||||
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_python_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
else
|
||||
test_python_all
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
|
@ -0,0 +1,49 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch"
|
||||
|
||||
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
|
||||
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_common
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_store
|
||||
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
|
||||
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
|
||||
# FSDP tests
|
||||
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
|
||||
# ShardedTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
|
||||
time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
|
||||
time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor
|
||||
# Other tests
|
||||
time python test/run_test.py --verbose -i test_cuda_primary_ctx
|
||||
time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
|
||||
assert_git_not_dirty
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
run_test () {
|
||||
rm -rf test_tmp/ && mkdir test_tmp/ && cd test_tmp/
|
||||
"$@"
|
||||
cd .. && rm -rf test_tmp/
|
||||
}
|
||||
|
||||
get_runtime_of_command () {
|
||||
TIMEFORMAT=%R
|
||||
|
||||
# runtime=$( { time ($@ &> /dev/null); } 2>&1 1>/dev/null)
|
||||
runtime=$( { time "$@"; } 2>&1 1>/dev/null)
|
||||
if [[ $runtime == *"Error"* ]]; then
|
||||
exit 1
|
||||
fi
|
||||
runtime=${runtime#+++ $@}
|
||||
runtime=$(python -c "print($runtime)")
|
||||
|
||||
echo "$runtime"
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
import sys
|
||||
import json
|
||||
import math
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--test-name', dest='test_name', action='store',
|
||||
required=True, help='test name')
|
||||
parser.add_argument('--sample-stats', dest='sample_stats', action='store',
|
||||
required=True, help='stats from sample')
|
||||
parser.add_argument('--update', action='store_true',
|
||||
help='whether to update baseline using stats from sample')
|
||||
args = parser.parse_args()
|
||||
|
||||
test_name = args.test_name
|
||||
|
||||
if 'cpu' in test_name:
|
||||
backend = 'cpu'
|
||||
elif 'gpu' in test_name:
|
||||
backend = 'gpu'
|
||||
|
||||
data_file_path = '../{}_runtime.json'.format(backend)
|
||||
|
||||
with open(data_file_path) as data_file:
|
||||
data = json.load(data_file)
|
||||
|
||||
if test_name in data:
|
||||
mean = float(data[test_name]['mean'])
|
||||
sigma = float(data[test_name]['sigma'])
|
||||
else:
|
||||
# Let the test pass if baseline number doesn't exist
|
||||
mean = sys.maxsize
|
||||
sigma = 0.001
|
||||
|
||||
print("population mean: ", mean)
|
||||
print("population sigma: ", sigma)
|
||||
|
||||
# Let the test pass if baseline number is NaN (which happened in
|
||||
# the past when we didn't have logic for catching NaN numbers)
|
||||
if math.isnan(mean) or math.isnan(sigma):
|
||||
mean = sys.maxsize
|
||||
sigma = 0.001
|
||||
|
||||
sample_stats_data = json.loads(args.sample_stats)
|
||||
|
||||
sample_mean = float(sample_stats_data['mean'])
|
||||
sample_sigma = float(sample_stats_data['sigma'])
|
||||
|
||||
print("sample mean: ", sample_mean)
|
||||
print("sample sigma: ", sample_sigma)
|
||||
|
||||
if math.isnan(sample_mean):
|
||||
raise Exception('''Error: sample mean is NaN''')
|
||||
elif math.isnan(sample_sigma):
|
||||
raise Exception('''Error: sample sigma is NaN''')
|
||||
|
||||
z_value = (sample_mean - mean) / sigma
|
||||
|
||||
print("z-value: ", z_value)
|
||||
|
||||
if z_value >= 3:
|
||||
raise Exception('''\n
|
||||
z-value >= 3, there is high chance of perf regression.\n
|
||||
To reproduce this regression, run
|
||||
`cd .ci/pytorch/perf_test/ && bash {}.sh` on your local machine
|
||||
and compare the runtime before/after your code change.
|
||||
'''.format(test_name))
|
||||
else:
|
||||
print("z-value < 3, no perf regression detected.")
|
||||
if args.update:
|
||||
print("We will use these numbers as new baseline.")
|
||||
new_data_file_path = '../new_{}_runtime.json'.format(backend)
|
||||
with open(new_data_file_path) as new_data_file:
|
||||
new_data = json.load(new_data_file)
|
||||
new_data[test_name] = {}
|
||||
new_data[test_name]['mean'] = sample_mean
|
||||
new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.1)
|
||||
with open(new_data_file_path, 'w') as new_data_file:
|
||||
json.dump(new_data, new_data_file, indent=4)
|
|
@ -0,0 +1,16 @@
|
|||
import sys
|
||||
import json
|
||||
import numpy
|
||||
|
||||
sample_data_list = sys.argv[1:]
|
||||
sample_data_list = [float(v.strip()) for v in sample_data_list]
|
||||
|
||||
sample_mean = numpy.mean(sample_data_list)
|
||||
sample_sigma = numpy.std(sample_data_list)
|
||||
|
||||
data = {
|
||||
'mean': sample_mean,
|
||||
'sigma': sample_sigma,
|
||||
}
|
||||
|
||||
print(json.dumps(data))
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_cpu_speed_mini_sequence_labeler () {
|
||||
echo "Testing: mini sequence labeler, CPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
|
||||
cd benchmark/
|
||||
|
||||
git checkout 726567a455edbfda6199445922a8cfee82535664
|
||||
|
||||
cd scripts/mini_sequence_labeler
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python main.py)
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_cpu_speed_mini_sequence_labeler "$@"
|
||||
fi
|
|
@ -0,0 +1,45 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_cpu_speed_mnist () {
|
||||
echo "Testing: MNIST, CPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/examples.git -b perftests
|
||||
|
||||
cd examples/mnist
|
||||
|
||||
conda install -c pytorch torchvision-cpu
|
||||
|
||||
# Download data
|
||||
python main.py --epochs 0
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_cpu_speed_mnist "$@"
|
||||
fi
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_cpu_speed_torch () {
|
||||
echo "Testing: torch.*, CPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/yf225/perf-tests.git
|
||||
|
||||
if [ "$1" == "compare_with_baseline" ]; then
|
||||
export ARGS=(--compare ../cpu_runtime.json)
|
||||
elif [ "$1" == "compare_and_update" ]; then
|
||||
export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
|
||||
elif [ "$1" == "update_only" ]; then
|
||||
export ARGS=(--update ../new_cpu_runtime.json)
|
||||
fi
|
||||
|
||||
if ! python perf-tests/modules/test_cpu_torch.py "${ARGS[@]}"; then
|
||||
echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_cpu_speed_torch "$@"
|
||||
fi
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_cpu_speed_torch_tensor () {
|
||||
echo "Testing: torch.Tensor.*, CPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/yf225/perf-tests.git
|
||||
|
||||
if [ "$1" == "compare_with_baseline" ]; then
|
||||
export ARGS=(--compare ../cpu_runtime.json)
|
||||
elif [ "$1" == "compare_and_update" ]; then
|
||||
export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
|
||||
elif [ "$1" == "update_only" ]; then
|
||||
export ARGS=(--update ../new_cpu_runtime.json)
|
||||
fi
|
||||
|
||||
if ! python perf-tests/modules/test_cpu_torch_tensor.py "${ARGS[@]}"; then
|
||||
echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_cpu_speed_torch_tensor "$@"
|
||||
fi
|
|
@ -0,0 +1,44 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_gpu_speed_cudnn_lstm () {
|
||||
echo "Testing: CuDNN LSTM, GPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
|
||||
cd benchmark/
|
||||
|
||||
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
|
||||
|
||||
cd scripts/
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python cudnn_lstm.py --skip-cpu-governor-check)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_gpu_speed_cudnn_lstm "$@"
|
||||
fi
|
|
@ -0,0 +1,44 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_gpu_speed_lstm () {
|
||||
echo "Testing: LSTM, GPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
|
||||
cd benchmark/
|
||||
|
||||
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
|
||||
|
||||
cd scripts/
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python lstm.py --skip-cpu-governor-check)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_gpu_speed_lstm "$@"
|
||||
fi
|
|
@ -0,0 +1,44 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_gpu_speed_mlstm () {
|
||||
echo "Testing: MLSTM, GPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
|
||||
cd benchmark/
|
||||
|
||||
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
|
||||
|
||||
cd scripts/
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python mlstm.py --skip-cpu-governor-check)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_gpu_speed_mlstm "$@"
|
||||
fi
|
|
@ -0,0 +1,48 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_gpu_speed_mnist () {
|
||||
echo "Testing: MNIST, GPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/examples.git -b perftests
|
||||
|
||||
cd examples/mnist
|
||||
|
||||
conda install -c pytorch torchvision
|
||||
|
||||
# Download data
|
||||
python main.py --epochs 0
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
# Needs warm up to get accurate number
|
||||
python main.py --epochs 1 --no-log
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_gpu_speed_mnist "$@"
|
||||
fi
|
|
@ -0,0 +1,53 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. ./common.sh
|
||||
|
||||
test_gpu_speed_word_language_model () {
|
||||
echo "Testing: word language model on Wikitext-2, GPU"
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MKL_NUM_THREADS=4
|
||||
|
||||
git clone https://github.com/pytorch/examples.git -b perftests
|
||||
|
||||
cd examples/word_language_model
|
||||
|
||||
cd data/wikitext-2
|
||||
|
||||
# Reduce dataset size, so that we can have more runs per test
|
||||
sed -n '1,200p' test.txt > test_tmp.txt
|
||||
sed -n '1,1000p' train.txt > train_tmp.txt
|
||||
sed -n '1,200p' valid.txt > valid_tmp.txt
|
||||
|
||||
mv test_tmp.txt test.txt
|
||||
mv train_tmp.txt train.txt
|
||||
mv valid_tmp.txt valid.txt
|
||||
|
||||
cd ../..
|
||||
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
for (( i=1; i<=NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python main.py --cuda --epochs 1)
|
||||
echo "$runtime"
|
||||
SAMPLE_ARRAY+=("${runtime}")
|
||||
done
|
||||
|
||||
cd ../..
|
||||
|
||||
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
|
||||
echo "Runtime stats in seconds:"
|
||||
echo "$stats"
|
||||
|
||||
if [ "$2" == "compare_with_baseline" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
|
||||
elif [ "$2" == "compare_and_update" ]; then
|
||||
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
run_test test_gpu_speed_word_language_model "$@"
|
||||
fi
|
|
@ -0,0 +1,13 @@
|
|||
import sys
|
||||
import json
|
||||
|
||||
data_file_path = sys.argv[1]
|
||||
commit_hash = sys.argv[2]
|
||||
|
||||
with open(data_file_path) as data_file:
|
||||
data = json.load(data_file)
|
||||
|
||||
data['commit'] = commit_hash
|
||||
|
||||
with open(data_file_path, 'w') as data_file:
|
||||
json.dump(data, data_file)
|
|
@ -0,0 +1,17 @@
|
|||
import sys
|
||||
|
||||
log_file_path = sys.argv[1]
|
||||
|
||||
with open(log_file_path) as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
# Ignore errors from CPU instruction set, symbol existing testing,
|
||||
# or compilation error formatting
|
||||
ignored_keywords = [
|
||||
'src.c',
|
||||
'CheckSymbolExists.c',
|
||||
'test_compilation_error_formatting',
|
||||
]
|
||||
if all([keyword not in line for keyword in ignored_keywords]):
|
||||
print(line)
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
CREATE_TEST_CERT="$(dirname "${BASH_SOURCE[0]}")/create_test_cert.py"
|
||||
TMP_CERT_DIR=$(python "$CREATE_TEST_CERT")
|
||||
|
||||
openssl verify -CAfile "${TMP_CERT_DIR}/ca.pem" "${TMP_CERT_DIR}/cert.pem"
|
||||
|
||||
export GLOO_DEVICE_TRANSPORT=TCP_TLS
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY=${TMP_CERT_DIR}/pkey.key
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT=${TMP_CERT_DIR}/cert.pem
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE=${TMP_CERT_DIR}/ca.pem
|
||||
|
||||
time python test/run_test.py --include distributed/test_c10d_gloo --verbose -- ProcessGroupGlooTest
|
||||
|
||||
unset GLOO_DEVICE_TRANSPORT
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCRIPT_PARENT_DIR=$(dirname "${BASH_SOURCE[0]}")
|
||||
|
||||
# shellcheck source=.ci/pytorch/common.sh
|
||||
source "$SCRIPT_PARENT_DIR/common.sh"
|
||||
|
||||
cd .ci/pytorch/perf_test
|
||||
|
||||
echo "Running CPU perf test for PyTorch..."
|
||||
|
||||
pip install -q awscli
|
||||
|
||||
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
|
||||
# More info at https://github.com/aws/aws-cli/issues/2321
|
||||
aws configure set default.s3.multipart_threshold 5GB
|
||||
UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# Get current default branch commit hash
|
||||
DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
|
||||
export DEFAULT_BRANCH_COMMIT_ID
|
||||
fi
|
||||
|
||||
# Find the default branch commit to test against
|
||||
git remote add upstream https://github.com/pytorch/pytorch.git
|
||||
git fetch upstream
|
||||
IFS=$'\n'
|
||||
while IFS='' read -r commit_id; do
|
||||
if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/"${commit_id}".json; then
|
||||
LATEST_TESTED_COMMIT=${commit_id}
|
||||
break
|
||||
fi
|
||||
done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
|
||||
aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/"${LATEST_TESTED_COMMIT}".json cpu_runtime.json
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# Prepare new baseline file
|
||||
cp cpu_runtime.json new_cpu_runtime.json
|
||||
python update_commit_hash.py new_cpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
|
||||
fi
|
||||
|
||||
# Include tests
|
||||
# shellcheck source=./perf_test/test_cpu_speed_mini_sequence_labeler.sh
|
||||
. ./test_cpu_speed_mini_sequence_labeler.sh
|
||||
# shellcheck source=./perf_test/test_cpu_speed_mnist.sh
|
||||
. ./test_cpu_speed_mnist.sh
|
||||
# shellcheck source=./perf_test/test_cpu_speed_torch.sh
|
||||
. ./test_cpu_speed_torch.sh
|
||||
# shellcheck source=./perf_test/test_cpu_speed_torch_tensor.sh
|
||||
. ./test_cpu_speed_torch_tensor.sh
|
||||
|
||||
# Run tests
|
||||
export TEST_MODE="compare_with_baseline"
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
export TEST_MODE="compare_and_update"
|
||||
fi
|
||||
|
||||
# Operator tests
|
||||
run_test test_cpu_speed_torch ${TEST_MODE}
|
||||
run_test test_cpu_speed_torch_tensor ${TEST_MODE}
|
||||
|
||||
# Sample model tests
|
||||
run_test test_cpu_speed_mini_sequence_labeler 20 ${TEST_MODE}
|
||||
run_test test_cpu_speed_mnist 20 ${TEST_MODE}
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# This could cause race condition if we are testing the same default branch commit twice,
|
||||
# but the chance of them executing this line at the same time is low.
|
||||
aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
|
||||
fi
|
|
@ -0,0 +1,76 @@
|
|||
#!/bin/bash
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
pushd .ci/pytorch/perf_test
|
||||
|
||||
echo "Running GPU perf test for PyTorch..."
|
||||
|
||||
# Trying to uninstall PyYAML can cause problem. Workaround according to:
|
||||
# https://github.com/pypa/pip/issues/5247#issuecomment-415571153
|
||||
pip install -q awscli --ignore-installed PyYAML
|
||||
|
||||
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
|
||||
# More info at https://github.com/aws/aws-cli/issues/2321
|
||||
aws configure set default.s3.multipart_threshold 5GB
|
||||
UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# Get current default branch commit hash
|
||||
DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
|
||||
export DEFAULT_BRANCH_COMMIT_ID
|
||||
fi
|
||||
|
||||
# Find the default branch commit to test against
|
||||
git remote add upstream https://github.com/pytorch/pytorch.git
|
||||
git fetch upstream
|
||||
IFS=$'\n'
|
||||
while IFS='' read -r commit_id; do
|
||||
if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/"${commit_id}".json; then
|
||||
LATEST_TESTED_COMMIT=${commit_id}
|
||||
break
|
||||
fi
|
||||
done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
|
||||
aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/"${LATEST_TESTED_COMMIT}".json gpu_runtime.json
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# Prepare new baseline file
|
||||
cp gpu_runtime.json new_gpu_runtime.json
|
||||
python update_commit_hash.py new_gpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
|
||||
fi
|
||||
|
||||
# Include tests
|
||||
# shellcheck source=./perf_test/test_gpu_speed_mnist.sh
|
||||
. ./test_gpu_speed_mnist.sh
|
||||
# shellcheck source=./perf_test/test_gpu_speed_word_language_model.sh
|
||||
. ./test_gpu_speed_word_language_model.sh
|
||||
# shellcheck source=./perf_test/test_gpu_speed_cudnn_lstm.sh
|
||||
. ./test_gpu_speed_cudnn_lstm.sh
|
||||
# shellcheck source=./perf_test/test_gpu_speed_lstm.sh
|
||||
. ./test_gpu_speed_lstm.sh
|
||||
# shellcheck source=./perf_test/test_gpu_speed_mlstm.sh
|
||||
. ./test_gpu_speed_mlstm.sh
|
||||
|
||||
# Run tests
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
run_test test_gpu_speed_mnist 20 compare_and_update
|
||||
run_test test_gpu_speed_word_language_model 20 compare_and_update
|
||||
run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
|
||||
run_test test_gpu_speed_lstm 20 compare_and_update
|
||||
run_test test_gpu_speed_mlstm 20 compare_and_update
|
||||
else
|
||||
run_test test_gpu_speed_mnist 20 compare_with_baseline
|
||||
run_test test_gpu_speed_word_language_model 20 compare_with_baseline
|
||||
run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline
|
||||
run_test test_gpu_speed_lstm 20 compare_with_baseline
|
||||
run_test test_gpu_speed_mlstm 20 compare_with_baseline
|
||||
fi
|
||||
|
||||
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
|
||||
# This could cause race condition if we are testing the same default branch commit twice,
|
||||
# but the chance of them executing this line at the same time is low.
|
||||
aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
|
||||
fi
|
||||
|
||||
popd
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,65 @@
|
|||
#!/bin/bash
|
||||
|
||||
# If you want to rebuild, run this with REBUILD=1
|
||||
# If you want to build with CUDA, run this with USE_CUDA=1
|
||||
# If you want to build without CUDA, run this with USE_CUDA=0
|
||||
|
||||
if [ ! -f setup.py ]; then
|
||||
echo "ERROR: Please run this build script from PyTorch root directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
|
||||
# shellcheck source=./common.sh
|
||||
source "$SCRIPT_PARENT_DIR/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$SCRIPT_PARENT_DIR/common-build.sh"
|
||||
|
||||
IMAGE_COMMIT_ID=$(git rev-parse HEAD)
|
||||
export IMAGE_COMMIT_ID
|
||||
export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}
|
||||
if [[ ${JOB_NAME} == *"develop"* ]]; then
|
||||
export IMAGE_COMMIT_TAG=develop-${IMAGE_COMMIT_TAG}
|
||||
fi
|
||||
|
||||
export TMP_DIR="${PWD}/build/win_tmp"
|
||||
TMP_DIR_WIN=$(cygpath -w "${TMP_DIR}")
|
||||
export TMP_DIR_WIN
|
||||
export PYTORCH_FINAL_PACKAGE_DIR=${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-results}
|
||||
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
fi
|
||||
|
||||
# This directory is used only to hold "pytorch_env_restore.bat", called via "setup_pytorch_env.bat"
|
||||
CI_SCRIPTS_DIR=$TMP_DIR/ci_scripts
|
||||
mkdir -p "$CI_SCRIPTS_DIR"
|
||||
|
||||
if [ -n "$(ls "$CI_SCRIPTS_DIR"/*)" ]; then
|
||||
rm "$CI_SCRIPTS_DIR"/*
|
||||
fi
|
||||
|
||||
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
|
||||
|
||||
set +ex
|
||||
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
|
||||
PYLONG_API_CHECK=$?
|
||||
if [[ $PYLONG_API_CHECK == 0 ]]; then
|
||||
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
|
||||
echo "because \`sizeof(long) == 4\` and \`sizeof(unsigned long) == 4\`."
|
||||
echo "Please include \"torch/csrc/utils/python_numbers.h\" and use the correspoding APIs instead."
|
||||
echo "PyLong_FromLong -> THPUtils_packInt32 / THPUtils_packInt64"
|
||||
echo "PyLong_AsLong -> THPUtils_unpackInt (32-bit) / THPUtils_unpackLong (64-bit)"
|
||||
echo "PyLong_FromUnsignedLong -> THPUtils_packUInt32 / THPUtils_packUInt64"
|
||||
echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
|
||||
exit 1
|
||||
fi
|
||||
set -ex
|
||||
|
||||
"$SCRIPT_HELPERS_DIR"/build_pytorch.bat
|
||||
|
||||
assert_git_not_dirty
|
||||
|
||||
if [ ! -f "${TMP_DIR}"/"${IMAGE_COMMIT_TAG}".7z ] && [ ! "${BUILD_ENVIRONMENT}" == "" ]; then
|
||||
exit 1
|
||||
fi
|
||||
echo "BUILD PASSED"
|
|
@ -0,0 +1,160 @@
|
|||
if "%DEBUG%" == "1" (
|
||||
set BUILD_TYPE=debug
|
||||
) ELSE (
|
||||
set BUILD_TYPE=release
|
||||
)
|
||||
|
||||
set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
|
||||
|
||||
:: This inflates our log size slightly, but it is REALLY useful to be
|
||||
:: able to see what our cl.exe commands are (since you can actually
|
||||
:: just copy-paste them into a local Windows setup to just rebuild a
|
||||
:: single file.)
|
||||
:: log sizes are too long, but leaving this here incase someone wants to use it locally
|
||||
:: set CMAKE_VERBOSE_MAKEFILE=1
|
||||
|
||||
|
||||
set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
|
||||
|
||||
|
||||
call %INSTALLER_DIR%\install_mkl.bat
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
call %INSTALLER_DIR%\install_magma.bat
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
call %INSTALLER_DIR%\install_sccache.bat
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
|
||||
:: We just need to activate it here
|
||||
call %INSTALLER_DIR%\activate_miniconda3.bat
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
:: Override VS env here
|
||||
pushd .
|
||||
if "%VC_VERSION%" == "" (
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
) else (
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
|
||||
)
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
@echo on
|
||||
popd
|
||||
|
||||
if not "%USE_CUDA%"=="1" goto cuda_build_end
|
||||
|
||||
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
|
||||
|
||||
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
|
||||
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
|
||||
exit /b 1
|
||||
)
|
||||
rem version transformer, for example 10.1 to 10_1.
|
||||
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
|
||||
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
|
||||
exit /b 1
|
||||
)
|
||||
set VERSION_SUFFIX=%CUDA_VERSION:.=_%
|
||||
set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
|
||||
|
||||
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
|
||||
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
|
||||
set CUDNN_ROOT_DIR=%CUDA_PATH%
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
|
||||
|
||||
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
|
||||
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
|
||||
set CUDNN_ROOT_DIR=%CUDA_PATH%
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
|
||||
|
||||
:cuda_build_end
|
||||
|
||||
set DISTUTILS_USE_SDK=1
|
||||
set PATH=%TMP_DIR_WIN%\bin;%PATH%
|
||||
|
||||
:: The latest Windows CUDA test is running on AWS G5 runner with A10G GPU
|
||||
if "%TORCH_CUDA_ARCH_LIST%" == "" set TORCH_CUDA_ARCH_LIST=8.6
|
||||
|
||||
:: The default sccache idle timeout is 600, which is too short and leads to intermittent build errors.
|
||||
set SCCACHE_IDLE_TIMEOUT=0
|
||||
set SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
sccache --stop-server
|
||||
sccache --start-server
|
||||
sccache --zero-stats
|
||||
set CC=sccache-cl
|
||||
set CXX=sccache-cl
|
||||
|
||||
set CMAKE_GENERATOR=Ninja
|
||||
|
||||
if "%USE_CUDA%"=="1" (
|
||||
:: randomtemp is used to resolve the intermittent build error related to CUDA.
|
||||
:: code: https://github.com/peterjc123/randomtemp-rust
|
||||
:: issue: https://github.com/pytorch/pytorch/issues/25393
|
||||
::
|
||||
:: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
|
||||
:: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
|
||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
|
||||
cat %TMP_DIR%/bin/nvcc.bat
|
||||
set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
|
||||
for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
|
||||
set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
|
||||
)
|
||||
|
||||
@echo off
|
||||
echo @echo off >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
|
||||
for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
|
||||
@echo on
|
||||
|
||||
if "%REBUILD%" == "" (
|
||||
if NOT "%BUILD_ENVIRONMENT%" == "" (
|
||||
:: Create a shortcut to restore pytorch environment
|
||||
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
|
||||
aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
)
|
||||
)
|
||||
|
||||
python setup.py bdist_wheel
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
sccache --show-stats
|
||||
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
||||
(
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
|
||||
) else (
|
||||
if "%USE_CUDA%"=="1" (
|
||||
7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\nvfuser && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
|
||||
) else (
|
||||
7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
|
||||
)
|
||||
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
:: export test times so that potential sharded tests that'll branch off this build will use consistent data
|
||||
python tools/stats/export_test_times.py
|
||||
copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
|
||||
:: Also save build/.ninja_log as an artifact
|
||||
copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
|
||||
)
|
||||
)
|
||||
|
||||
sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
|
||||
sccache --stop-server
|
|
@ -0,0 +1,4 @@
|
|||
REM The first argument should the CUDA version
|
||||
echo %PATH%
|
||||
echo %CUDA_PATH%
|
||||
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%1\bin;%PATH%
|
|
@ -0,0 +1,19 @@
|
|||
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
|
||||
:: exit the batch once there's an error
|
||||
if not errorlevel 0 (
|
||||
echo "setup pytorch env failed"
|
||||
echo %errorlevel%
|
||||
exit /b
|
||||
)
|
||||
|
||||
echo "Test functorch"
|
||||
pushd test
|
||||
python run_test.py --functorch --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
|
||||
popd
|
||||
if ERRORLEVEL 1 goto fail
|
||||
|
||||
:eof
|
||||
exit /b 0
|
||||
|
||||
:fail
|
||||
exit /b 1
|
|
@ -0,0 +1,26 @@
|
|||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
set CONDA_PARENT_DIR=%CD%
|
||||
) else (
|
||||
set CONDA_PARENT_DIR=C:\Jenkins
|
||||
)
|
||||
|
||||
|
||||
:: Be conservative here when rolling out the new AMI with conda. This will try
|
||||
:: to install conda as before if it couldn't find the conda installation. This
|
||||
:: can be removed eventually after we gain enough confidence in the AMI
|
||||
if not exist %CONDA_PARENT_DIR%\Miniconda3 (
|
||||
set INSTALL_FRESH_CONDA=1
|
||||
)
|
||||
|
||||
if "%INSTALL_FRESH_CONDA%"=="1" (
|
||||
curl --retry 3 --retry-all-errors -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
)
|
||||
|
||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
||||
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
|
@ -0,0 +1,37 @@
|
|||
if "%CUDA_VERSION%" == "cpu" (
|
||||
echo skip magma installation for cpu builds
|
||||
exit /b 0
|
||||
)
|
||||
|
||||
rem remove dot in cuda_version, fox example 11.1 to 111
|
||||
|
||||
if not "%USE_CUDA%"=="1" (
|
||||
exit /b 0
|
||||
)
|
||||
|
||||
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
|
||||
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
set VERSION_SUFFIX=%CUDA_VERSION:.=%
|
||||
set CUDA_SUFFIX=cuda%VERSION_SUFFIX%
|
||||
|
||||
if "%CUDA_SUFFIX%" == "" (
|
||||
echo unknown CUDA version, please set `CUDA_VERSION` higher than 10.2
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%REBUILD%"=="" (
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --output %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z
|
||||
) else (
|
||||
aws s3 cp s3://ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --quiet
|
||||
)
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
7z x -aoa %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z -o%TMP_DIR_WIN%\magma
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
)
|
||||
set MAGMA_HOME=%TMP_DIR_WIN%\magma
|
|
@ -0,0 +1,14 @@
|
|||
if "%REBUILD%"=="" (
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z --output %TMP_DIR_WIN%\mkl.7z
|
||||
) else (
|
||||
aws s3 cp s3://ossci-windows/mkl_2020.2.254.7z %TMP_DIR_WIN%\mkl.7z --quiet
|
||||
)
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
7z x -aoa %TMP_DIR_WIN%\mkl.7z -o%TMP_DIR_WIN%\mkl
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
)
|
||||
set CMAKE_INCLUDE_PATH=%TMP_DIR_WIN%\mkl\include
|
||||
set LIB=%TMP_DIR_WIN%\mkl\lib;%LIB%
|
|
@ -0,0 +1,18 @@
|
|||
mkdir %TMP_DIR_WIN%\bin
|
||||
|
||||
if "%REBUILD%"=="" (
|
||||
:check_sccache
|
||||
%TMP_DIR_WIN%\bin\sccache.exe --show-stats || (
|
||||
taskkill /im sccache.exe /f /t || ver > nul
|
||||
del %TMP_DIR_WIN%\bin\sccache.exe || ver > nul
|
||||
del %TMP_DIR_WIN%\bin\sccache-cl.exe || ver > nul
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %TMP_DIR_WIN%\bin\sccache.exe
|
||||
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output %TMP_DIR_WIN%\bin\sccache-cl.exe
|
||||
) else (
|
||||
aws s3 cp s3://ossci-windows/sccache.exe %TMP_DIR_WIN%\bin\sccache.exe
|
||||
aws s3 cp s3://ossci-windows/sccache-cl.exe %TMP_DIR_WIN%\bin\sccache-cl.exe
|
||||
)
|
||||
goto :check_sccache
|
||||
)
|
||||
)
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
COMMON_TESTS = [
|
||||
(
|
||||
"Checking that torch is available",
|
||||
"import torch",
|
||||
),
|
||||
(
|
||||
"Checking that MKL is available",
|
||||
"import torch; exit(0 if torch.backends.mkl.is_available() else 1)",
|
||||
),
|
||||
]
|
||||
|
||||
GPU_TESTS = [
|
||||
(
|
||||
"Checking that CUDA archs are setup correctly",
|
||||
"import torch; torch.randn([3,5]).cuda()",
|
||||
),
|
||||
(
|
||||
"Checking that magma is available",
|
||||
"import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)",
|
||||
),
|
||||
(
|
||||
"Checking that CuDNN is available",
|
||||
"import torch; exit(0 if torch.backends.cudnn.is_available() else 1)",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if 'USE_CUDA' in os.environ and os.environ['USE_CUDA'] == '1':
|
||||
TESTS = COMMON_TESTS + GPU_TESTS
|
||||
else:
|
||||
TESTS = COMMON_TESTS
|
||||
for description, python_commands in TESTS:
|
||||
print(description)
|
||||
command_args = ["python", "-c", python_commands]
|
||||
command_string = " ".join(command_args)
|
||||
print("Command:", command_string)
|
||||
try:
|
||||
subprocess.check_call(command_args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
sdk_root = os.environ.get('WindowsSdkDir', 'C:\\Program Files (x86)\\Windows Kits\\10')
|
||||
debugger = os.path.join(sdk_root, 'Debuggers', 'x64', 'cdb.exe')
|
||||
if os.path.exists(debugger):
|
||||
command_args = [debugger, "-o", "-c", "~*g; q"] + command_args
|
||||
command_string = " ".join(command_args)
|
||||
print("Reruning with traceback enabled")
|
||||
print("Command:", command_string)
|
||||
subprocess.run(command_args, check=False)
|
||||
exit(e.returncode)
|
|
@ -0,0 +1,73 @@
|
|||
if exist "%TMP_DIR%/ci_scripts/pytorch_env_restore.bat" (
|
||||
call %TMP_DIR%/ci_scripts/pytorch_env_restore.bat
|
||||
exit /b 0
|
||||
)
|
||||
|
||||
set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
|
||||
|
||||
:: Install Miniconda3
|
||||
set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
|
||||
|
||||
:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
|
||||
:: We just need to activate it here
|
||||
call %INSTALLER_DIR%\activate_miniconda3.bat
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
pushd .
|
||||
if "%VC_VERSION%" == "" (
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
) else (
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
|
||||
)
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
@echo on
|
||||
popd
|
||||
|
||||
set DISTUTILS_USE_SDK=1
|
||||
|
||||
if not "%USE_CUDA%"=="1" goto cuda_build_end
|
||||
|
||||
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
|
||||
|
||||
rem version transformer, for example 10.1 to 10_1.
|
||||
set VERSION_SUFFIX=%CUDA_VERSION:.=_%
|
||||
set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
|
||||
|
||||
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
|
||||
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
|
||||
set CUDNN_ROOT_DIR=%CUDA_PATH%
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
|
||||
set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
|
||||
set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
|
||||
set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
|
||||
|
||||
:cuda_build_end
|
||||
|
||||
set PYTHONPATH=%TMP_DIR_WIN%\build;%PYTHONPATH%
|
||||
|
||||
if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
pushd %TMP_DIR_WIN%\build
|
||||
copy /Y %PYTORCH_FINAL_PACKAGE_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %TMP_DIR_WIN%\
|
||||
:: 7z: -aos skips if exists because this .bat can be called multiple times
|
||||
7z x %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z -aos
|
||||
popd
|
||||
) else (
|
||||
xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
||||
)
|
||||
|
||||
@echo off
|
||||
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
|
||||
for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
|
||||
@echo on
|
||||
|
||||
if NOT "%BUILD_ENVIRONMENT%" == "" (
|
||||
:: Create a shortcut to restore pytorch environment
|
||||
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
|
||||
|
||||
aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
|
||||
)
|
|
@ -0,0 +1,36 @@
|
|||
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
|
||||
|
||||
git submodule update --init --recursive third_party/pybind11
|
||||
cd test\custom_backend
|
||||
|
||||
:: Build the custom backend library.
|
||||
mkdir build
|
||||
pushd build
|
||||
|
||||
echo "Executing CMake for custom_backend test..."
|
||||
|
||||
:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
|
||||
cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
echo "Executing Ninja for custom_backend test..."
|
||||
|
||||
ninja -v
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
echo "Ninja succeeded for custom_backend test."
|
||||
|
||||
popd
|
||||
|
||||
:: Run tests Python-side and export a script module.
|
||||
python test_custom_backend.py -v
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
python backend.py --export-module-to="build/model.pt"
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:: Run tests C++-side and load the exported script module.
|
||||
cd build
|
||||
set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
|
||||
test_custom_backend.exe model.pt
|
||||
if ERRORLEVEL 1 exit /b 1
|
|
@ -0,0 +1,41 @@
|
|||
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
|
||||
|
||||
git submodule update --init --recursive third_party/pybind11
|
||||
cd test\custom_operator
|
||||
|
||||
:: Build the custom operator library.
|
||||
mkdir build
|
||||
pushd build
|
||||
|
||||
echo "Executing CMake for custom_operator test..."
|
||||
|
||||
:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
|
||||
cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
echo "Executing Ninja for custom_operator test..."
|
||||
|
||||
ninja -v
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
echo "Ninja succeeded for custom_operator test."
|
||||
|
||||
popd
|
||||
|
||||
:: Run tests Python-side and export a script module.
|
||||
python test_custom_ops.py -v
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:: TODO: fix and re-enable this test
|
||||
:: See https://github.com/pytorch/pytorch/issues/25155
|
||||
:: python test_custom_classes.py -v
|
||||
:: if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
python model.py --export-script-module="build/model.pt"
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:: Run tests C++-side and load the exported script module.
|
||||
cd build
|
||||
set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
|
||||
test_custom_ops.exe model.pt
|
||||
if ERRORLEVEL 1 exit /b 1
|
|
@ -0,0 +1,24 @@
|
|||
REM The first argument should lead to the python interpreter
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_common
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_gloo
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_nccl
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_data_parallel
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_store
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
||||
|
||||
%1\python.exe test/run_test.py --verbose -i distributed/test_pg_wrapper
|
||||
if %errorlevel% neq 0 ( exit /b %errorlevel% )
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue