Forklet of Pytorch

upstream-v2.0 v2.0.1
Jeff Moe 2023-11-08 09:01:59 -07:00
commit e5ae8f15ad
11673 changed files with 3188227 additions and 0 deletions

115
.bazelrc 100644
View File

@ -0,0 +1,115 @@
build --cxxopt=--std=c++17
build --copt=-I.
# Bazel does not support including its cc_library targets as system
# headers. We work around this for generated code
# (e.g. c10/macros/cmake_macros.h) by making the generated directory a
# system include path.
build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
build --copt=-isystem --copt bazel-out/darwin-fastbuild/bin
build --experimental_ui_max_stdouterr_bytes=2048576
# Configuration to disable tty features for environments like CI
build:no-tty --curses no
build:no-tty --progress_report_interval 10
build:no-tty --show_progress_rate_limit 10
# Build with GPU support by default.
build --define=cuda=true
# rules_cuda configuration
build --@rules_cuda//cuda:enable_cuda
build --@rules_cuda//cuda:cuda_targets=sm_52
build --@rules_cuda//cuda:compiler=nvcc
build --repo_env=CUDA_PATH=/usr/local/cuda
# Configuration to build without GPU support
build:cpu-only --define=cuda=false
# define a separate build folder for faster switching between configs
build:cpu-only --platform_suffix=-cpu-only
# See the note on the config-less build for details about why we are
# doing this. We must also do it for the "-cpu-only" platform suffix.
build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
# rules_cuda configuration
build:cpu-only --@rules_cuda//cuda:enable_cuda=False
# Definition of --config=shell
# interactive shell immediately before execution
build:shell --run_under="//tools/bazel_tools:shellwrap"
# Disable all warnings for external repositories. We don't care about
# their warnings.
build --per_file_copt=^external/@-w
# Set additional warnings to error level.
#
# Implementation notes:
# * we use file extensions to determine if we are using the C++
# compiler or the cuda compiler
# * we use ^// at the start of the regex to only permit matching
# PyTorch files. This excludes external repos.
#
# Note that because this is logically a command-line flag, it is
# considered the word on what warnings are enabled. This has the
# unfortunate consequence of preventing us from disabling an error at
# the target level because those flags will come before these flags in
# the action invocation. Instead we provide per-file exceptions after
# this.
#
# On the bright side, this means we don't have to more broadly apply
# the exceptions to an entire target.
#
# Looking for CUDA flags? We have a cu_library macro that we can edit
# directly. Look in //tools/rules:cu.bzl for details. Editing the
# macro over this has the following advantages:
# * making changes does not require discarding the Bazel analysis
# cache
# * it allows for selective overrides on individual targets since the
# macro-level opts will come earlier than target level overrides
build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
# The following warnings come from -Wall. We downgrade them from error
# to warnings here.
#
# sign-compare has a tremendous amount of violations in the
# codebase. It will be a lot of work to fix them, just disable it for
# now.
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-sign-compare
# We intentionally use #pragma unroll, which is compiler specific.
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
# The following warnings come from -Wextra. We downgrade them from error
# to warnings here.
#
# unused-parameter-compare has a tremendous amount of violations in the
# codebase. It will be a lot of work to fix them, just disable it for
# now.
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
# missing-field-parameters has both a large number of violations in
# the codebase, but it also is used pervasively in the Python C
# API. There are a couple of catches though:
# * we use multiple versions of the Python API and hence have
# potentially multiple different versions of each relevant
# struct. They may have different numbers of fields. It will be
# unwieldy to support multiple versions in the same source file.
# * Python itself for many of these structs recommends only
# initializing a subset of the fields. We should respect the API
# usage conventions of our dependencies.
#
# Hence, we just disable this warning altogether. We may want to clean
# up some of the clear-cut cases that could be risky, but we still
# likely want to have this disabled for the most part.
build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function

1
.bazelversion 100644
View File

@ -0,0 +1 @@
4.2.1

25
.buckconfig.oss 100644
View File

@ -0,0 +1,25 @@
[pt]
is_oss=1
[buildfile]
name = BUCK.oss
includes = //tools/build_defs/select.bzl
[repositories]
bazel_skylib = third_party/bazel-skylib/
ovr_config = .
[download]
in_build = true
[cxx]
cxxflags = -std=c++17
should_remap_host_platform = true
cpp = /usr/bin/clang
cc = /usr/bin/clang
cxx = /usr/bin/clang++
cxxpp = /usr/bin/clang++
ld = /usr/bin/clang++
[project]
default_flavors_mode=all

View File

@ -0,0 +1,14 @@
# Jenkins
The scripts in this directory are the entrypoint for testing Caffe2.
The environment variable `BUILD_ENVIRONMENT` is expected to be set to
the build environment you intend to test. It is a hint for the build
and test scripts to configure Caffe2 a certain way and include/exclude
tests. Docker images, they equal the name of the image itself. For
example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
built on Jenkins and are used in triggered builds already have this
environment variable set in their manifest. Also see
`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.

View File

@ -0,0 +1,36 @@
set -ex
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
TEST_DIR="$ROOT_DIR/test"
gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
pytest_reports_dir="${TEST_DIR}/test-reports/python"
# Figure out which Python to use
PYTHON="$(which python)"
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
PYTHON=$(which "python${BASH_REMATCH[1]}")
fi
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
unset HIP_PLATFORM
if which sccache > /dev/null; then
# Save sccache logs to file
sccache --stop-server || true
rm -f ~/sccache_error.log || true
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
# Report sccache stats for easier debugging
sccache --zero-stats
fi
fi
# /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
# builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
# that the test code in .ci/test.sh is the same
INSTALL_PREFIX="/usr/local/caffe2"
mkdir -p "$gtest_reports_dir" || true
mkdir -p "$pytest_reports_dir" || true
mkdir -p "$INSTALL_PREFIX" || true

172
.ci/caffe2/test.sh 100755
View File

@ -0,0 +1,172 @@
#!/bin/bash
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
pip install click mock tabulate networkx==2.0
pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
fi
# Skip tests in environments where they are not built/applicable
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
echo 'Skipping tests'
exit 0
fi
if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
# temporary to locate some kernel issues on the CI nodes
export HSAKMT_DEBUG_LEVEL=4
fi
# These additional packages are needed for circleci ROCm builds.
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
# Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
# defaults installs the most recent networkx version, so we install this lower
# version explicitly before scikit-image pulls it in as a dependency
pip install networkx==2.0
# click - onnx
pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
fi
# Find where cpp tests and Caffe2 itself are installed
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
# For cmake only build we install everything into /usr/local
cpp_test_dir="$INSTALL_PREFIX/cpp_test"
ld_library_path="$INSTALL_PREFIX/lib"
else
# For Python builds we install into python
# cd to /usr first so the python import doesn't get confused by any 'caffe2'
# directory in cwd
python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
caffe2_pypath="$python_installation/caffe2"
cpp_test_dir="$python_installation/torch/test"
ld_library_path="$python_installation/torch/lib"
fi
################################################################################
# C++ tests #
################################################################################
# Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
echo "Running C++ tests.."
for test in $(find "$cpp_test_dir" -executable -type f); do
case "$test" in
# skip tests we know are hanging or bad
*/mkl_utils_test|*/aten/integer_divider_test)
continue
;;
*/scalar_tensor_test|*/basic|*/native_test)
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
continue
else
LD_LIBRARY_PATH="$ld_library_path" "$test"
fi
;;
*/*_benchmark)
LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
;;
*)
# Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
# planning to migrate to gtest as the common PyTorch c++ test suite, we
# currently do NOT use the xml test reporter, because Catch doesn't
# support multiple reporters
# c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
# which means that enabling XML output means you lose useful stdout
# output for Jenkins. It's more important to have useful console
# output than it is to have XML output for Jenkins.
# Note: in the future, if we want to use xml test reporter once we switch
# to all gtest, one can simply do:
LD_LIBRARY_PATH="$ld_library_path" \
"$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
;;
esac
done
fi
################################################################################
# Python tests #
################################################################################
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
exit 0
fi
# If pip is installed as root, we must use sudo.
# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
PIP=$(which pip)
PIP_USER=$(stat --format '%U' $PIP)
CURRENT_USER=$(id -u -n)
if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
MAYBE_SUDO=sudo
fi
# Uninstall pre-installed hypothesis and coverage to use an older version as newer
# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
$MAYBE_SUDO pip -q uninstall -y hypothesis
$MAYBE_SUDO pip -q uninstall -y coverage
# "pip install hypothesis==3.44.6" from official server is unreliable on
# CircleCI, so we host a copy on S3 instead
$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
$MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
# Collect additional tests to run (outside caffe2/python)
EXTRA_TESTS=()
# CUDA builds always include NCCL support
if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
fi
rocm_ignore_test=()
if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
# Currently these tests are failing on ROCM platform:
# On ROCm, RCCL (distributed) development isn't complete.
# https://github.com/ROCmSoftwarePlatform/rccl
rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
# This test has been flaky in ROCm CI (but note the tests are
# cpu-only so should be unrelated to ROCm)
rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
# This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
# This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
fi
echo "Running Python tests.."
# locale setting is required by click package
for loc in "en_US.utf8" "C.UTF-8"; do
if locale -a | grep "$loc" >/dev/null 2>&1; then
export LC_ALL="$loc"
export LANG="$loc"
break;
fi
done
# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
export DNNL_MAX_CPU_ISA=AVX2
# Should still run even in the absence of SHARD_NUMBER
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
# TODO(sdym@meta.com) remove this when the linked issue resolved.
# py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
pip install --user py==1.11.0
pip install --user pytest-sugar
# NB: Warnings are disabled because they make it harder to see what
# the actual erroring test is
"$PYTHON" \
-m pytest \
-x \
-v \
--disable-warnings \
--junit-xml="$pytest_reports_dir/result.xml" \
--ignore "$caffe2_pypath/python/test/executor_test.py" \
--ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
--ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
--ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
--ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
${rocm_ignore_test[@]} \
"$caffe2_pypath/python" \
"${EXTRA_TESTS[@]}"
fi

View File

@ -0,0 +1,31 @@
# Docker images for Jenkins
This directory contains everything needed to build the Docker images
that are used in our CI
The Dockerfiles located in subdirectories are parameterized to
conditionally run build stages depending on build arguments passed to
`docker build`. This lets us use only a few Dockerfiles for many
images. The different configurations are identified by a freeform
string that we call a _build environment_. This string is persisted in
each image as the `BUILD_ENVIRONMENT` environment variable.
See `build.sh` for valid build environments (it's the giant switch).
Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definitions.py`
## Contents
* `build.sh` -- dispatch script to launch all builds
* `common` -- scripts used to execute individual Docker build stages
* `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
## Usage
```bash
# Build a specific image
./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
# Set flags (see build.sh) and build image
sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
```

View File

@ -0,0 +1 @@
<manifest package="org.pytorch.deps" />

View File

@ -0,0 +1,66 @@
buildscript {
ext {
minSdkVersion = 21
targetSdkVersion = 28
compileSdkVersion = 28
buildToolsVersion = '28.0.3'
coreVersion = "1.2.0"
extJUnitVersion = "1.1.1"
runnerVersion = "1.2.0"
rulesVersion = "1.2.0"
junitVersion = "4.12"
}
repositories {
google()
mavenLocal()
mavenCentral()
jcenter()
}
dependencies {
classpath 'com.android.tools.build:gradle:4.1.2'
classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2'
}
}
repositories {
google()
jcenter()
}
apply plugin: 'com.android.library'
android {
compileSdkVersion rootProject.compileSdkVersion
buildToolsVersion rootProject.buildToolsVersion
defaultConfig {
minSdkVersion minSdkVersion
targetSdkVersion targetSdkVersion
}
sourceSets {
main {
manifest.srcFile 'AndroidManifest.xml'
}
}
}
dependencies {
implementation 'com.android.support:appcompat-v7:28.0.0'
implementation 'androidx.appcompat:appcompat:1.0.0'
implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
implementation 'com.google.code.findbugs:jsr305:3.0.1'
implementation 'com.facebook.soloader:nativeloader:0.10.4'
implementation 'junit:junit:' + rootProject.junitVersion
implementation 'androidx.test:core:' + rootProject.coreVersion
implementation 'junit:junit:' + rootProject.junitVersion
implementation 'androidx.test:core:' + rootProject.coreVersion
implementation 'androidx.test.ext:junit:' + rootProject.extJUnitVersion
implementation 'androidx.test:rules:' + rootProject.rulesVersion
implementation 'androidx.test:runner:' + rootProject.runnerVersion
}

392
.ci/docker/build.sh 100755
View File

@ -0,0 +1,392 @@
#!/bin/bash
set -ex
image="$1"
shift
if [ -z "${image}" ]; then
echo "Usage: $0 IMAGE"
exit 1
fi
function extract_version_from_image_name() {
eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
if [ "x${!2}" = x ]; then
echo "variable '$2' not correctly parsed from image='$image'"
exit 1
fi
}
function extract_all_from_image_name() {
# parts $image into array, splitting on '-'
keep_IFS="$IFS"
IFS="-"
declare -a parts=($image)
IFS="$keep_IFS"
unset keep_IFS
for part in "${parts[@]}"; do
name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
vername="${name^^}_VERSION"
# "py" is the odd one out, needs this special case
if [ "x${name}" = xpy ]; then
vername=ANACONDA_PYTHON_VERSION
fi
# skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
if [ -n "${name}" ]; then
extract_version_from_image_name "${name}" "${vername}"
fi
done
}
# Use the same pre-built XLA test image from PyTorch/XLA
if [[ "$image" == *xla* ]]; then
echo "Using pre-built XLA test image..."
exit 0
fi
if [[ "$image" == *-bionic* ]]; then
UBUNTU_VERSION=18.04
elif [[ "$image" == *-focal* ]]; then
UBUNTU_VERSION=20.04
elif [[ "$image" == *-jammy* ]]; then
UBUNTU_VERSION=22.04
elif [[ "$image" == *ubuntu* ]]; then
extract_version_from_image_name ubuntu UBUNTU_VERSION
elif [[ "$image" == *centos* ]]; then
extract_version_from_image_name centos CENTOS_VERSION
fi
if [ -n "${UBUNTU_VERSION}" ]; then
OS="ubuntu"
elif [ -n "${CENTOS_VERSION}" ]; then
OS="centos"
else
echo "Unable to derive operating system base..."
exit 1
fi
DOCKERFILE="${OS}/Dockerfile"
# When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
DOCKERFILE="${OS}-cuda/Dockerfile"
elif [[ "$image" == *rocm* ]]; then
DOCKERFILE="${OS}-rocm/Dockerfile"
elif [[ "$image" == *linter* ]]; then
# Use a separate Dockerfile for linter to keep a small image size
DOCKERFILE="linter/Dockerfile"
fi
# CMake 3.18 is needed to support CUDA17 language variant
CMAKE_VERSION=3.18.5
_UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
# It's annoying to rename jobs every time you want to rewrite a
# configuration, so we hardcode everything here rather than do it
# from scratch
case "$image" in
pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7)
CUDA_VERSION=11.6.2
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
;;
pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
CUDA_VERSION=11.7.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
;;
pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7)
CUDA_VERSION=11.8.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
;;
pytorch-linux-focal-py3-clang7-asan)
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
;;
pytorch-linux-focal-py3-clang10-onnx)
ANACONDA_PYTHON_VERSION=3.8
CLANG_VERSION=10
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
;;
pytorch-linux-focal-py3-clang7-android-ndk-r19c)
ANACONDA_PYTHON_VERSION=3.7
CLANG_VERSION=7
LLVMDEV=yes
PROTOBUF=yes
ANDROID=yes
ANDROID_NDK_VERSION=r19c
GRADLE_VERSION=6.8.3
NINJA_VERSION=1.9.0
;;
pytorch-linux-bionic-py3.8-clang9)
ANACONDA_PYTHON_VERSION=3.8
CLANG_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
VULKAN_SDK_VERSION=1.2.162.1
SWIFTSHADER=yes
CONDA_CMAKE=yes
;;
pytorch-linux-bionic-py3.11-clang9)
ANACONDA_PYTHON_VERSION=3.11
CLANG_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
VULKAN_SDK_VERSION=1.2.162.1
SWIFTSHADER=yes
CONDA_CMAKE=yes
;;
pytorch-linux-bionic-py3.8-gcc9)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
;;
pytorch-linux-focal-rocm-n-1-py3)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.3
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
;;
pytorch-linux-focal-rocm-n-py3)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.4.2
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
;;
pytorch-linux-focal-py3.8-gcc7)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
CONDA_CMAKE=yes
;;
pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
ANACONDA_PYTHON_VERSION=3.8
CUDA_VERSION=11.6
CUDNN_VERSION=8
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
;;
pytorch-linux-jammy-cuda11.7-cudnn8-py3.8-clang12)
ANACONDA_PYTHON_VERSION=3.8
CUDA_VERSION=11.7
CUDNN_VERSION=8
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
;;
pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
ANACONDA_PYTHON_VERSION=3.8
CUDA_VERSION=11.8
CUDNN_VERSION=8
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
;;
pytorch-linux-focal-linter)
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
# We will need to update mypy version eventually, but that's for another day. The task
# would be to upgrade mypy to 1.0.0 with Python 3.11
ANACONDA_PYTHON_VERSION=3.9
CONDA_CMAKE=yes
;;
*)
# Catch-all for builds that are not hardcoded.
PROTOBUF=yes
DB=yes
VISION=yes
echo "image '$image' did not match an existing build configuration"
if [[ "$image" == *py* ]]; then
extract_version_from_image_name py ANACONDA_PYTHON_VERSION
fi
if [[ "$image" == *cuda* ]]; then
extract_version_from_image_name cuda CUDA_VERSION
extract_version_from_image_name cudnn CUDNN_VERSION
fi
if [[ "$image" == *rocm* ]]; then
extract_version_from_image_name rocm ROCM_VERSION
NINJA_VERSION=1.9.0
fi
if [[ "$image" == *centos7* ]]; then
NINJA_VERSION=1.10.2
fi
if [[ "$image" == *gcc* ]]; then
extract_version_from_image_name gcc GCC_VERSION
fi
if [[ "$image" == *clang* ]]; then
extract_version_from_image_name clang CLANG_VERSION
fi
if [[ "$image" == *devtoolset* ]]; then
extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
fi
if [[ "$image" == *glibc* ]]; then
extract_version_from_image_name glibc GLIBC_VERSION
fi
if [[ "$image" == *cmake* ]]; then
extract_version_from_image_name cmake CMAKE_VERSION
fi
;;
esac
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
#when using cudnn version 8 install it separately from cuda
if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
if [[ ${CUDNN_VERSION} == 8 ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
fi
fi
# Build image
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
# it's no longer needed.
docker build \
--no-cache \
--progress=plain \
--build-arg "BUILD_ENVIRONMENT=${image}" \
--build-arg "PROTOBUF=${PROTOBUF:-}" \
--build-arg "THRIFT=${THRIFT:-}" \
--build-arg "LLVMDEV=${LLVMDEV:-}" \
--build-arg "DB=${DB:-}" \
--build-arg "VISION=${VISION:-}" \
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
--build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
--build-arg "GCC_VERSION=${GCC_VERSION}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
--build-arg "ANDROID=${ANDROID}" \
--build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \
"$@" \
.
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
# find the correct image. As a result, here we have to replace the
# "$UBUNTU_VERSION" == "18.04-rc"
# with
# "$UBUNTU_VERSION" == "18.04"
UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
function drun() {
docker run --rm "$tmp_tag" $*
}
if [[ "$OS" == "ubuntu" ]]; then
if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
echo "OS=ubuntu, but:"
drun lsb_release -a
exit 1
fi
if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
drun lsb_release -a
exit 1
fi
fi
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
drun python --version
exit 1
fi
fi
if [ -n "$GCC_VERSION" ]; then
if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
echo "GCC_VERSION=$GCC_VERSION, but:"
drun gcc --version
exit 1
fi
fi
if [ -n "$CLANG_VERSION" ]; then
if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
echo "CLANG_VERSION=$CLANG_VERSION, but:"
drun clang --version
exit 1
fi
fi
if [ -n "$KATEX" ]; then
if !(drun katex --version); then
echo "KATEX=$KATEX, but:"
drun katex --version
exit 1
fi
fi

View File

@ -0,0 +1,60 @@
#!/bin/bash
set -ex
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*)
}
# If UPSTREAM_BUILD_ID is set (see trigger job), then we can
# use it to tag this build with the same ID used to tag all other
# base image builds. Also, we can try and pull the previous
# image first, to avoid rebuilding layers that haven't changed.
#until we find a way to reliably reuse previous build, this last_tag is not in use
# last_tag="$(( CIRCLE_BUILD_NUM - 1 ))"
tag="${DOCKER_TAG}"
registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
image="${registry}/pytorch/${IMAGE_NAME}"
login() {
aws ecr get-authorization-token --region us-east-1 --output text --query 'authorizationData[].authorizationToken' |
base64 -d |
cut -d: -f2 |
docker login -u AWS --password-stdin "$1"
}
# Only run these steps if not on github actions
if [[ -z "${GITHUB_ACTIONS}" ]]; then
# Retry on timeouts (can happen on job stampede).
retry login "${registry}"
# Logout on exit
trap "docker logout ${registry}" EXIT
fi
# Try to pull the previous image (perhaps we can reuse some layers)
# if [ -n "${last_tag}" ]; then
# docker pull "${image}:${last_tag}" || true
# fi
# Build new image
./build.sh ${IMAGE_NAME} -t "${image}:${tag}"
# Only push if `DOCKER_SKIP_PUSH` = false
if [ "${DOCKER_SKIP_PUSH:-true}" = "false" ]; then
# Only push if docker image doesn't exist already.
# ECR image tags are immutable so this will avoid pushing if only just testing if the docker jobs work
# NOTE: The only workflow that should push these images should be the docker-builds.yml workflow
if ! docker manifest inspect "${image}:${tag}" >/dev/null 2>/dev/null; then
docker push "${image}:${tag}"
fi
fi
if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
trap "rm -rf ${IMAGE_NAME}:${tag}.tar" EXIT
docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
fi

View File

@ -0,0 +1,111 @@
ARG CENTOS_VERSION
FROM centos:${CENTOS_VERSION}
ARG CENTOS_VERSION
# Set AMD gpu targets to build for
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
# Install required packages to build Caffe2
# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Update CentOS git version
RUN yum -y remove git
RUN yum -y remove git-*
RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm || \
(yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
sed -i "s/packages.endpoint/packages.endpointdev/" /etc/yum.repos.d/endpoint.repo)
RUN yum install -y git
# Install devtoolset
ARG DEVTOOLSET_VERSION
COPY ./common/install_devtoolset.sh install_devtoolset.sh
RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
ENV BASH_ENV "/etc/profile"
# (optional) Install non-default glibc version
ARG GLIBC_VERSION
COPY ./common/install_glibc.sh install_glibc.sh
RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
RUN rm install_glibc.sh
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# (optional) Install protobuf for ONNX
ARG PROTOBUF
COPY ./common/install_protobuf.sh install_protobuf.sh
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV and ffmpeg
ARG VISION
COPY ./common/install_vision.sh install_vision.sh
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}
# Install rocm
ARG ROCM_VERSION
COPY ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh
RUN rm install_rocm.sh
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh
RUN rm install_rocm_magma.sh
ENV PATH /opt/rocm/bin:$PATH
ENV PATH /opt/rocm/hcc/bin:$PATH
ENV PATH /opt/rocm/hip/bin:$PATH
ENV PATH /opt/rocm/opencl/bin:$PATH
ENV PATH /opt/rocm/llvm/bin:$PATH
ENV MAGMA_HOME /opt/rocm/magma
ENV LANG en_US.utf8
ENV LC_ALL en_US.utf8
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh
# (optional) Install non-default Ninja version
ARG NINJA_VERSION
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
USER jenkins
CMD ["bash"]

View File

@ -0,0 +1,32 @@
#!/bin/bash
# Work around bug where devtoolset replaces sudo and breaks it.
if [ -n "$DEVTOOLSET_VERSION" ]; then
export SUDO=/bin/sudo
else
export SUDO=sudo
fi
as_jenkins() {
# NB: unsetting the environment variables works around a conda bug
# https://github.com/conda/conda/issues/6576
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
# NB: This must be run from a directory that jenkins has access to,
# works around https://github.com/conda/conda-package-handling/pull/34
$SUDO -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
}
conda_install() {
# Ensure that the install command don't upgrade/downgrade Python
# This should be called as
# conda_install pkg1 pkg2 ... [-c channel]
as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
}
conda_run() {
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
}
pip_install() {
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
}

View File

@ -0,0 +1,109 @@
#!/bin/bash
set -ex
[ -n "${ANDROID_NDK}" ]
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
apt-get update
apt-get install -y --no-install-recommends autotools-dev autoconf unzip
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
pushd /tmp
curl -Os --retry 3 $_https_amazon_aws/android-ndk-${ANDROID_NDK}-linux-x86_64.zip
popd
_ndk_dir=/opt/ndk
mkdir -p "$_ndk_dir"
unzip -qo /tmp/android*.zip -d "$_ndk_dir"
_versioned_dir=$(find "$_ndk_dir/" -mindepth 1 -maxdepth 1 -type d)
mv "$_versioned_dir"/* "$_ndk_dir"/
rmdir "$_versioned_dir"
rm -rf /tmp/*
# Install OpenJDK
# https://hub.docker.com/r/picoded/ubuntu-openjdk-8-jdk/dockerfile/
sudo apt-get update && \
apt-get install -y openjdk-8-jdk && \
apt-get install -y ant && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /var/cache/oracle-jdk8-installer;
# Fix certificate issues, found as of
# https://bugs.launchpad.net/ubuntu/+source/ca-certificates-java/+bug/983302
sudo apt-get update && \
apt-get install -y ca-certificates-java && \
apt-get clean && \
update-ca-certificates -f && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /var/cache/oracle-jdk8-installer;
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
# Installing android sdk
# https://github.com/circleci/circleci-images/blob/staging/android/Dockerfile.m4
_tmp_sdk_zip=/tmp/android-sdk-linux.zip
_android_home=/opt/android/sdk
rm -rf $_android_home
sudo mkdir -p $_android_home
curl --silent --show-error --location --fail --retry 3 --output /tmp/android-sdk-linux.zip $_https_amazon_aws/android-sdk-linux-tools3859397-build-tools2803-2902-platforms28-29.zip
sudo unzip -q $_tmp_sdk_zip -d $_android_home
rm $_tmp_sdk_zip
sudo chmod -R 777 $_android_home
export ANDROID_HOME=$_android_home
export ADB_INSTALL_TIMEOUT=120
export PATH="${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:${PATH}"
echo "PATH:${PATH}"
# Installing Gradle
echo "GRADLE_VERSION:${GRADLE_VERSION}"
_gradle_home=/opt/gradle
sudo rm -rf $gradle_home
sudo mkdir -p $_gradle_home
curl --silent --output /tmp/gradle.zip --retry 3 $_https_amazon_aws/gradle-${GRADLE_VERSION}-bin.zip
sudo unzip -q /tmp/gradle.zip -d $_gradle_home
rm /tmp/gradle.zip
sudo chmod -R 777 $_gradle_home
export GRADLE_HOME=$_gradle_home/gradle-$GRADLE_VERSION
alias gradle="${GRADLE_HOME}/bin/gradle"
export PATH="${GRADLE_HOME}/bin/:${PATH}"
echo "PATH:${PATH}"
gradle --version
mkdir /var/lib/jenkins/gradledeps
cp build.gradle /var/lib/jenkins/gradledeps
cp AndroidManifest.xml /var/lib/jenkins/gradledeps
pushd /var/lib/jenkins
export GRADLE_LOCAL_PROPERTIES=gradledeps/local.properties
rm -f $GRADLE_LOCAL_PROPERTIES
echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES
echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES
chown -R jenkins /var/lib/jenkins/gradledeps
chgrp -R jenkins /var/lib/jenkins/gradledeps
sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble
chown -R jenkins /var/lib/jenkins/.gradle
chgrp -R jenkins /var/lib/jenkins/.gradle
popd
rm -rf /var/lib/jenkins/.gradle/daemon

View File

@ -0,0 +1,169 @@
#!/bin/bash
set -ex
install_ubuntu() {
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
# find the correct image. As a result, here we have to check for
# "$UBUNTU_VERSION" == "18.04"*
# instead of
# "$UBUNTU_VERSION" == "18.04"
if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
cmake3="cmake=3.10*"
maybe_libiomp_dev="libiomp-dev"
elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
cmake3="cmake=3.16*"
maybe_libiomp_dev=""
elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
cmake3="cmake=3.22*"
maybe_libiomp_dev=""
else
cmake3="cmake=3.5*"
maybe_libiomp_dev="libiomp-dev"
fi
if [[ "$CLANG_VERSION" == 12 ]]; then
maybe_libomp_dev="libomp-12-dev"
elif [[ "$CLANG_VERSION" == 10 ]]; then
maybe_libomp_dev="libomp-10-dev"
else
maybe_libomp_dev=""
fi
# TODO: Remove this once nvidia package repos are back online
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
# shellcheck disable=SC2046
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
# Install common dependencies
apt-get update
# TODO: Some of these may not be necessary
ccache_deps="asciidoc docbook-xml docbook-xsl xsltproc"
deploy_deps="libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev"
numpy_deps="gfortran"
apt-get install -y --no-install-recommends \
$ccache_deps \
$numpy_deps \
${deploy_deps} \
${cmake3} \
apt-transport-https \
autoconf \
automake \
build-essential \
ca-certificates \
curl \
git \
libatlas-base-dev \
libc6-dbg \
${maybe_libiomp_dev} \
libyaml-dev \
libz-dev \
libjpeg-dev \
libasound2-dev \
libsndfile-dev \
${maybe_libomp_dev} \
software-properties-common \
wget \
sudo \
vim \
jq \
libtool \
vim \
unzip \
gdb
# Should resolve issues related to various apt package repository cert issues
# see: https://github.com/pytorch/pytorch/issues/65931
apt-get install -y libgnutls30
# cuda-toolkit does not work with gcc-11.2.0 which is default in Ubunutu 22.04
# see: https://github.com/NVlabs/instant-ngp/issues/119
if [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
apt-get install -y g++-10
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-10 30
# https://www.spinics.net/lists/libreoffice/msg07549.html
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/11
wget https://github.com/gcc-mirror/gcc/commit/2b2d97fc545635a0f6aa9c9ee3b017394bc494bf.patch -O noexecpt.patch
sudo patch /usr/include/c++/10/bits/range_access.h noexecpt.patch
fi
# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
numpy_deps="gcc-gfortran"
# Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
# for Caffe2. That said, we still install them to make sure the build
# system opts to build/use protoc and libprotobuf from third-party.
yum install -y \
$ccache_deps \
$numpy_deps \
autoconf \
automake \
bzip2 \
cmake \
cmake3 \
curl \
gcc \
gcc-c++ \
gflags-devel \
git \
glibc-devel \
glibc-headers \
glog-devel \
hiredis-devel \
libstdc++-devel \
libsndfile-devel \
make \
opencv-devel \
sudo \
wget \
vim \
unzip \
gdb
# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
# Install Valgrind separately since the apt-get version is too old.
mkdir valgrind_build && cd valgrind_build
VALGRIND_VERSION=3.20.0
wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
cd valgrind-${VALGRIND_VERSION}
./configure --prefix=/usr/local
make -j6
sudo make install
cd ../../
rm -rf valgrind_build
alias valgrind="/usr/local/bin/valgrind"

View File

@ -0,0 +1,121 @@
#!/bin/bash
set -ex
install_ubuntu() {
echo "Preparing to build sccache from source"
apt-get update
# libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
apt-get install -y cargo
echo "Checking out sccache repo"
git clone https://github.com/pytorch/sccache
cd sccache
echo "Building sccache"
cargo build --release
cp target/release/sccache /opt/cache/bin
echo "Cleaning up"
cd ..
rm -rf sccache
apt-get remove -y cargo rustc
apt-get autoclean && apt-get clean
}
install_binary() {
echo "Downloading sccache binary from S3 repo"
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
}
mkdir -p /opt/cache/bin
mkdir -p /opt/cache/lib
sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
export PATH="/opt/cache/bin:$PATH"
# Setup compiler cache
if [ -n "$ROCM_VERSION" ]; then
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
else
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
install_binary
;;
esac
fi
chmod a+x /opt/cache/bin/sccache
function write_sccache_stub() {
# Unset LD_PRELOAD for ps because of asan + ps issues
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
chmod a+x "/opt/cache/bin/$1"
}
write_sccache_stub cc
write_sccache_stub c++
write_sccache_stub gcc
write_sccache_stub g++
# NOTE: See specific ROCM_VERSION case below.
if [ "x$ROCM_VERSION" = x ]; then
write_sccache_stub clang
write_sccache_stub clang++
fi
if [ -n "$CUDA_VERSION" ]; then
# TODO: This is a workaround for the fact that PyTorch's FindCUDA
# implementation cannot find nvcc if it is setup this way, because it
# appears to search for the nvcc in PATH, and use its path to infer
# where CUDA is installed. Instead, we install an nvcc symlink outside
# of the PATH, and set CUDA_NVCC_EXECUTABLE so that we make use of it.
write_sccache_stub nvcc
mv /opt/cache/bin/nvcc /opt/cache/lib/
fi
if [ -n "$ROCM_VERSION" ]; then
# ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
# hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
# causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
# directly under /opt/rocm while also preserving the original compiler names.
# Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
# Final link in symlink chain must point back to original directory.
# Original compiler is moved one directory deeper. Wrapper replaces it.
function write_sccache_stub_rocm() {
OLDCOMP=$1
COMPNAME=$(basename $OLDCOMP)
TOPDIR=$(dirname $OLDCOMP)
WRAPPED="$TOPDIR/original/$COMPNAME"
mv "$OLDCOMP" "$WRAPPED"
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP"
chmod a+x "$OLDCOMP"
}
if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
# ROCm 3.3 or earlier.
mkdir /opt/rocm/hcc/bin/original
write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/hcc/bin/original
ln -s ../$(readlink clang)
popd
elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
# ROCm 3.5 and beyond.
mkdir /opt/rocm/llvm/bin/original
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/llvm/bin/original
ln -s ../$(readlink clang)
popd
else
echo "Cannot find ROCm compiler."
exit 1
fi
fi

View File

@ -0,0 +1,47 @@
#!/bin/bash
set -ex
if [ -n "$CLANG_VERSION" ]; then
if [[ $CLANG_VERSION == 7 && $UBUNTU_VERSION == 16.04 ]]; then
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
sudo apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main"
elif [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
sudo apt-get update
# gpg-agent is not available by default on 18.04
sudo apt-get install -y --no-install-recommends gpg-agent
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
# work around ubuntu apt-get conflicts
sudo apt-get -y -f install
fi
sudo apt-get update
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
# Install dev version of LLVM.
if [ -n "$LLVMDEV" ]; then
sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
fi
# Use update-alternatives to make this version the default
# TODO: Decide if overriding gcc as well is a good idea
# update-alternatives --install /usr/bin/gcc gcc /usr/bin/clang-"$CLANG_VERSION" 50
# update-alternatives --install /usr/bin/g++ g++ /usr/bin/clang++-"$CLANG_VERSION" 50
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
# clang's packaging is a little messed up (the runtime libs aren't
# added into the linker path), so give it a little help
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
ldconfig
# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
fi

View File

@ -0,0 +1,31 @@
#!/bin/bash
set -ex
[ -n "$CMAKE_VERSION" ]
# Remove system cmake install so it won't get used instead
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
apt-get remove cmake -y
;;
centos)
yum remove cmake -y
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
# Turn 3.6.3 into v3.6
path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"
# Download and install specific CMake version in /usr/local
pushd /tmp
curl -Os --retry 3 "https://cmake.org/files/${path}/${file}"
tar -C /usr/local --strip-components 1 --no-same-owner -zxf cmake-*.tar.gz
rm -f cmake-*.tar.gz
popd

View File

@ -0,0 +1,98 @@
#!/bin/bash
set -ex
# Optionally install conda
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
BASE_URL="https://repo.anaconda.com/miniconda"
MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
case "$MAJOR_PYTHON_VERSION" in
2)
CONDA_FILE="Miniconda2-latest-Linux-x86_64.sh"
;;
3)
CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
;;
*)
echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
exit 1
;;
esac
mkdir -p /opt/conda
chown jenkins:jenkins /opt/conda
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
popd
# NB: Don't do this, rely on the rpath to get it right
#echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
#ldconfig
sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
export PATH="/opt/conda/bin:$PATH"
# Ensure we run conda in a directory that jenkins has write access to
pushd /opt/conda
# Prevent conda from updating to 4.14.0, which causes docker build failures
# See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
# Uncomment the below when resolved to track the latest conda update
# as_jenkins conda update -y -n base conda
# Install correct Python version
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION"
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
# TODO: Stop using `-c malfet`
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0 -c malfet
elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
else
# Install `typing-extensions` for 3.7
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} typing-extensions
fi
# Use conda cmake in some cases. Conda cmake will be newer than our supported
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
# following builds that we know should use conda. Specifically, Ubuntu bionic
# and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
if [ -n "${CONDA_CMAKE}" ]; then
conda_install cmake
fi
# Magma package names are concatenation of CUDA major and minor ignoring revision
# I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
if [ -n "$CUDA_VERSION" ]; then
conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
fi
# Install some other packages, including those needed for Python test reporting
pip_install -r /opt/conda/requirements-ci.txt
# Update scikit-learn to a python-3.8 compatible version
if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
pip_install -U scikit-learn
else
# Pinned scikit-learn due to https://github.com/scikit-learn/scikit-learn/issues/14485 (affects gcc 5.5 only)
pip_install scikit-learn==0.20.3
fi
popd
fi

View File

@ -0,0 +1,27 @@
#!/bin/bash
if [[ ${CUDNN_VERSION} == 8 ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
else
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
fi
tar xf ${CUDNN_NAME}.tar.xz
cp -a ${CUDNN_NAME}/include/* /usr/include/
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
cd ..
rm -rf tmp_cudnn
ldconfig
fi

View File

@ -0,0 +1,49 @@
#!/bin/bash
set -ex
install_ubuntu() {
apt-get update
apt-get install -y --no-install-recommends \
libhiredis-dev \
libleveldb-dev \
liblmdb-dev \
libsnappy-dev
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
yum install -y \
hiredis-devel \
leveldb-devel \
lmdb-devel \
snappy-devel
# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac

View File

@ -0,0 +1,10 @@
#!/bin/bash
set -ex
[ -n "$DEVTOOLSET_VERSION" ]
yum install -y centos-release-scl
yum install -y devtoolset-$DEVTOOLSET_VERSION
echo "source scl_source enable devtoolset-$DEVTOOLSET_VERSION" > "/etc/profile.d/devtoolset-$DEVTOOLSET_VERSION.sh"

View File

@ -0,0 +1,25 @@
#!/bin/bash
set -ex
if [ -n "$KATEX" ]; then
apt-get update
# Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
apt-get install -y gpg-agent || :
curl --retry 3 -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -
sudo apt-get install -y nodejs
curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
apt-get update
apt-get install -y --no-install-recommends yarn
yarn global add katex --prefix /usr/local
sudo apt-get -y install doxygen
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
fi

View File

@ -0,0 +1,27 @@
#!/bin/bash
set -ex
if [ -n "$GCC_VERSION" ]; then
# Need the official toolchain repo to get alternate packages
add-apt-repository ppa:ubuntu-toolchain-r/test
apt-get update
if [[ "$UBUNTU_VERSION" == "16.04" && "${GCC_VERSION:0:1}" == "5" ]]; then
apt-get install -y g++-5=5.4.0-6ubuntu1~16.04.12
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-5 50
else
apt-get install -y g++-$GCC_VERSION
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
fi
# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
fi

View File

@ -0,0 +1,34 @@
#!/bin/bash
set -ex
[ -n "$GLIBC_VERSION" ]
if [[ -n "$CENTOS_VERSION" ]]; then
[ -n "$DEVTOOLSET_VERSION" ]
fi
yum install -y wget sed
mkdir -p /packages && cd /packages
wget -q http://ftp.gnu.org/gnu/glibc/glibc-$GLIBC_VERSION.tar.gz
tar xzf glibc-$GLIBC_VERSION.tar.gz
if [[ "$GLIBC_VERSION" == "2.26" ]]; then
cd glibc-$GLIBC_VERSION
sed -i 's/$name ne "nss_test1"/$name ne "nss_test1" \&\& $name ne "nss_test2"/' scripts/test-installation.pl
cd ..
fi
mkdir -p glibc-$GLIBC_VERSION-build && cd glibc-$GLIBC_VERSION-build
if [[ -n "$CENTOS_VERSION" ]]; then
export PATH=/opt/rh/devtoolset-$DEVTOOLSET_VERSION/root/usr/bin:$PATH
fi
../glibc-$GLIBC_VERSION/configure --prefix=/usr CFLAGS='-Wno-stringop-truncation -Wno-format-overflow -Wno-restrict -Wno-format-truncation -g -O2'
make -j$(nproc)
make install
# Cleanup
rm -rf /packages
rm -rf /var/cache/yum/*
rm -rf /var/lib/rpm/__db.*
yum clean all

View File

@ -0,0 +1,6 @@
#!/bin/bash
set -ex
mkdir -p /usr/local/include
cp jni.h /usr/local/include

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -ex
git clone --branch v1.15 https://github.com/linux-test-project/lcov.git
pushd lcov
sudo make install # will be installed in /usr/local/bin/lcov
popd

View File

@ -0,0 +1,29 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
if [ -n "${UBUNTU_VERSION}" ]; then
apt update
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
fi
# Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
git clone https://github.com/pytorch/pytorch.git --depth 1
chown -R jenkins pytorch
pushd pytorch
# Install all linter dependencies
pip_install -r requirements.txt
conda_run lintrunner init
# Cache .lintbin directory as part of the Docker image
cp -r .lintbin /tmp
popd
# Node dependencies required by toc linter job
npm install -g markdown-toc
# Cleaning up
rm -rf pytorch

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -ex
[ -n "$NINJA_VERSION" ]
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
pushd /tmp
wget --no-verbose --output-document=ninja-linux.zip "$url"
unzip ninja-linux.zip -d /usr/local/bin
rm -f ninja-linux.zip
popd

View File

@ -0,0 +1,10 @@
#!/bin/bash
sudo apt-get update
# also install ssh to avoid error of:
# --------------------------------------------------------------------------
# The value of the MCA parameter "plm_rsh_agent" was set to a path
# that could not be found:
# plm_rsh_agent: ssh : rsh
sudo apt-get install -y ssh
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev

View File

@ -0,0 +1,16 @@
#!/bin/bash
set -ex
OPENSSL=openssl-1.1.1k
wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
tar xf "${OPENSSL}.tar.gz"
cd "${OPENSSL}"
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
# NOTE: openssl install errors out when built with the -j option
make -j6; make install_sw
# Link the ssl libraries to the /usr/lib folder.
sudo ln -s /opt/openssl/lib/lib* /usr/lib
cd ..
rm -rf "${OPENSSL}"

View File

@ -0,0 +1,56 @@
#!/bin/bash
set -ex
# This function installs protobuf 3.17
install_protobuf_317() {
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
# else it will fail with
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
# -j6 to balance memory usage and speed.
# naked `-j` seems to use too much memory.
pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
popd
rm -rf $pb_dir
}
install_ubuntu() {
# Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
# install cmake3 here and use cmake3.
apt-get update
if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
apt-get install -y --no-install-recommends cmake3
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
install_protobuf_317
}
install_centos() {
install_protobuf_317
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac

View File

@ -0,0 +1,146 @@
#!/bin/bash
set -ex
ver() {
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
}
# Map ROCm version to AMDGPU version
declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )
install_ubuntu() {
apt-get update
if [[ $UBUNTU_VERSION == 18.04 ]]; then
# gpg-agent is not available by default on 18.04
apt-get install -y --no-install-recommends gpg-agent
fi
if [[ $UBUNTU_VERSION == 20.04 ]]; then
# gpg-agent is not available by default on 20.04
apt-get install -y --no-install-recommends gpg-agent
fi
apt-get install -y kmod
apt-get install -y wget
# Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
apt-get install -y libc++1
apt-get install -y libc++abi1
if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
# Add amdgpu repository
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
local amdgpu_baseurl
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
else
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
fi
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
fi
ROCM_REPO="ubuntu"
if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
ROCM_REPO="xenial"
fi
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
ROCM_REPO="${UBUNTU_VERSION_NAME}"
fi
# Add rocm repository
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
apt-get update --allow-insecure-repositories
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
rocm-dev \
rocm-utils \
rocm-libs \
rccl \
rocprofiler-dev \
roctracer-dev
# precompiled miopen kernels added in ROCm 3.5; search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENKERNELS}" = x ]]; then
echo "miopenkernels package not available"
else
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}
install_centos() {
yum update -y
yum install -y kmod
yum install -y wget
yum install -y openblas-devel
yum install -y epel-release
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
# Add amdgpu repository
local amdgpu_baseurl
if [[ $OS_VERSION == 9 ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
else
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
else
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
fi
fi
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
fi
local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
yum update -y
yum install -y \
rocm-dev \
rocm-utils \
rocm-libs \
rccl \
rocprofiler-dev \
roctracer-dev
# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
# Install Python packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac

View File

@ -0,0 +1,29 @@
#!/bin/bash
set -ex
# "install" hipMAGMA into /opt/rocm/magma by copying after build
git clone https://bitbucket.org/icl/magma.git
pushd magma
# Fixes memory leaks of magma found while executing linalg UTs
git checkout 5959b8783e45f1809812ed96ae762f38ee701972
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
export PATH="${PATH}:/opt/rocm/bin"
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
else
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
fi
for arch in $amdgpu_targets; do
echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
done
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
make -f make.gen.hipMAGMA -j $(nproc)
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
popd
mv magma /opt/rocm

View File

@ -0,0 +1,24 @@
#!/bin/bash
set -ex
[ -n "${SWIFTSHADER}" ]
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
# SwiftShader
_swiftshader_dir=/var/lib/jenkins/swiftshader
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
mkdir -p $_swiftshader_dir
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
curl --silent --show-error --location --fail --retry 3 \
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"

View File

@ -0,0 +1,14 @@
apt-get update
apt-get install -y sudo wget libboost-dev libboost-test-dev libboost-program-options-dev libboost-filesystem-dev libboost-thread-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev
wget https://www-us.apache.org/dist/thrift/0.12.0/thrift-0.12.0.tar.gz
tar -xvf thrift-0.12.0.tar.gz
cd thrift-0.12.0
for file in ./compiler/cpp/Makefile*; do
sed -i 's/\-Werror//' $file
done
./bootstrap.sh
./configure --without-php --without-java --without-python --without-nodejs --without-go --without-ruby
sudo make
sudo make install
cd ..
rm thrift-0.12.0.tar.gz

View File

@ -0,0 +1,48 @@
#!/bin/bash
set -ex
if [[ -d "/usr/local/cuda/" ]]; then
with_cuda=/usr/local/cuda/
else
with_cuda=no
fi
function install_ucx() {
set -ex
git clone --recursive https://github.com/openucx/ucx.git
pushd ucx
git checkout ${UCX_COMMIT}
git submodule update --init --recursive
./autogen.sh
./configure --prefix=$UCX_HOME \
--enable-mt \
--with-cuda=$with_cuda \
--enable-profiling \
--enable-stats
time make -j
sudo make install
popd
rm -rf ucx
}
function install_ucc() {
set -ex
git clone --recursive https://github.com/openucx/ucc.git
pushd ucc
git checkout ${UCC_COMMIT}
git submodule update --init --recursive
./autogen.sh
./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda
time make -j
sudo make install
popd
rm -rf ucc
}
install_ucx
install_ucc

View File

@ -0,0 +1,33 @@
#!/bin/bash
set -ex
# Mirror jenkins user in container
# jenkins user as ec2-user should have the same user-id
echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
echo "jenkins:x:1000:" >> /etc/group
# Needed on focal or newer
echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
# Create $HOME
mkdir -p /var/lib/jenkins
chown jenkins:jenkins /var/lib/jenkins
mkdir -p /var/lib/jenkins/.ccache
chown jenkins:jenkins /var/lib/jenkins/.ccache
# Allow writing to /usr/local (for make install)
chown jenkins:jenkins /usr/local
# Allow sudo
# TODO: Maybe we shouldn't
echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
# Work around bug where devtoolset replaces sudo and breaks it.
if [ -n "$DEVTOOLSET_VERSION" ]; then
SUDO=/bin/sudo
else
SUDO=sudo
fi
# Test that sudo works
$SUDO -u jenkins $SUDO -v

View File

@ -0,0 +1,45 @@
#!/bin/bash
set -ex
install_ubuntu() {
apt-get update
apt-get install -y --no-install-recommends \
libopencv-dev \
libavcodec-dev
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
yum install -y \
opencv-devel \
ffmpeg-devel
# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac

View File

@ -0,0 +1,24 @@
#!/bin/bash
set -ex
[ -n "${VULKAN_SDK_VERSION}" ]
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
curl \
--silent \
--show-error \
--location \
--fail \
--retry 3 \
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
mkdir -p "${_vulkansdk_dir}"
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
rm -rf "${_tmp_vulkansdk_targz}"

1143
.ci/docker/java/jni.h 100644

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
ARG UBUNTU_VERSION
FROM ubuntu:${UBUNTU_VERSION}
ARG UBUNTU_VERSION
ENV DEBIAN_FRONTEND noninteractive
# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Note that Docker build forbids copying file outside the build context
COPY ./common/install_linter.sh install_linter.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_linter.sh
RUN rm install_linter.sh common_utils.sh
USER jenkins
CMD ["bash"]

View File

@ -0,0 +1,260 @@
# Python dependencies required for unit tests
#awscli==1.6 #this breaks some platforms
#Description: AWS command line interface
#Pinned versions: 1.6
#test that import:
boto3==1.19.12
#Description: AWS SDK for python
#Pinned versions: 1.19.12, 1.16.34
#test that import:
click
#Description: Command Line Interface Creation Kit
#Pinned versions:
#test that import:
coremltools==5.0b5
#Description: Apple framework for ML integration
#Pinned versions: 5.0b5
#test that import:
#dataclasses #this breaks some platforms
#Description: Provides decorators for auto adding special methods to user classes
#Pinned versions:
#test that import:
expecttest==0.1.3
#Description: method for writing tests where test framework auto populates
# the expected output based on previous runs
#Pinned versions: 0.1.3
#test that import:
flatbuffers==2.0
#Description: cross platform serialization library
#Pinned versions: 2.0
#test that import:
hypothesis==5.35.1
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
#Description: advanced library for generating parametrized tests
#Pinned versions: 3.44.6, 4.53.2
#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
junitparser==2.1.1
#Description: unitparser handles JUnit/xUnit Result XML files
#Pinned versions: 2.1.1
#test that import:
librosa>=0.6.2 ; python_version < "3.11"
#Description: A python package for music and audio analysis
#Pinned versions: >=0.6.2
#test that import: test_spectral_ops.py
#mkl #this breaks linux-bionic-rocm4.5-py3.7
#Description: Intel oneAPI Math Kernel Library
#Pinned versions:
#test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
#test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
#test_autograd.py
#mkl-devel
# see mkl
#mock # breaks ci/circleci: docker-pytorch-linux-xenial-py3-clang5-android-ndk-r19c
#Description: A testing library that allows you to replace parts of your
#system under test with mock objects
#Pinned versions:
#test that import: test_module_init.py, test_modules.py, test_nn.py,
#test_testing.py
#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
#Description: collects runtime types of function arguments and return
#values, and can automatically generate stub files
#Pinned versions:
#test that import:
mypy==0.960
# Pin MyPy version because new errors are likely to appear with each release
#Description: linter
#Pinned versions: 0.960
#test that import: test_typing.py, test_type_hints.py
networkx==2.6.3
#Description: creation, manipulation, and study of
#the structure, dynamics, and functions of complex networks
#Pinned versions: 2.6.3 (latest version that works with Python 3.7+)
#test that import: functorch
#ninja
#Description: build system. Note that it install from
#here breaks things so it is commented out
#Pinned versions: 1.10.0.post1
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
numba==0.49.0 ; python_version < "3.9"
numba==0.54.1 ; python_version == "3.9"
numba==0.55.2 ; python_version == "3.10"
#Description: Just-In-Time Compiler for Numerical Functions
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
#test that import: test_numba_integration.py
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
#numpy
#Description: Provides N-dimensional arrays and linear algebra
#Pinned versions: 1.20
#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
#test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
#test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
#test_overrides.py, test_numpy_interop.py, test_numba_integration.py
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
#test_binary_ufuncs.py
#onnxruntime
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
#Pinned versions: 1.9.0
#test that import:
opt-einsum==3.3
#Description: Python library to optimize tensor contraction order, used in einsum
#Pinned versions: 3.3
#test that import: test_linalg.py
#pillow
#Description: Python Imaging Library fork
#Pinned versions:
#test that import:
protobuf==3.20.2
#Description: Googles data interchange format
#Pinned versions: 3.20.1
#test that import: test_tensorboard.py
psutil
#Description: information on running processes and system utilization
#Pinned versions:
#test that import: test_profiler.py, test_openmp.py, test_dataloader.py
pytest
#Description: testing framework
#Pinned versions:
#test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
pytest-xdist
#Description: plugin for running pytest in parallel
#Pinned versions:
#test that import:
pytest-shard
#Description: plugin spliting up tests in pytest
#Pinned versions:
#test that import:
pytest-flakefinder==1.1.0
#Description: plugin for rerunning tests a fixed number of times in pytest
#Pinned versions: 1.1.0
#test that import:
pytest-rerunfailures
#Description: plugin for rerunning failure tests in pytest
#Pinned versions:
#test that import:
#pytest-benchmark
#Description: fixture for benchmarking code
#Pinned versions: 3.2.3
#test that import:
#pytest-sugar
#Description: shows failures and errors instantly
#Pinned versions:
#test that import:
xdoctest==1.1.0
#Description: runs doctests in pytest
#Pinned versions: 1.1.0
#test that import:
pygments==2.12.0
#Description: support doctest highlighting
#Pinned versions: 2.12.0
#test that import: the doctests
#PyYAML
#Description: data serialization format
#Pinned versions:
#test that import:
#requests
#Description: HTTP library
#Pinned versions:
#test that import: test_type_promotion.py
#rich
#Description: rich text and beautiful formatting in the terminal
#Pinned versions: 10.9.0
#test that import:
scikit-image
#Description: image processing routines
#Pinned versions:
#test that import: test_nn.py
#scikit-learn
#Description: machine learning package
#Pinned versions: 0.20.3
#test that import:
scipy==1.6.3 ; python_version < "3.10"
scipy==1.8.1 ; python_version == "3.10"
scipy==1.9.3 ; python_version == "3.11"
# Pin SciPy because of failing distribution tests (see #60347)
#Description: scientific python
#Pinned versions: 1.6.3
#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
#test_linalg.py, test_binary_ufuncs.py
#tabulate
#Description: Pretty-print tabular data
#Pinned versions:
#test that import:
tb-nightly
#Description: TensorBoard
#Pinned versions:
#test that import:
#typing-extensions
#Description: type hints for python
#Pinned versions:
#test that import:
#virtualenv
#Description: virtual environment for python
#Pinned versions:
#test that import:
unittest-xml-reporting<=3.2.0,>=2.0.0
#Description: saves unit test results to xml
#Pinned versions:
#test that import:
lintrunner==0.9.2
#Description: all about linters
#Pinned versions: 0.9.2
#test that import:
rockset==1.0.3
#Description: queries Rockset
#Pinned versions: 1.0.3
#test that import:
ghstack==0.7.1
#Description: ghstack tool
#Pinned versions: 0.7.1
#test that import:

View File

@ -0,0 +1,132 @@
ARG UBUNTU_VERSION
ARG CUDA_VERSION
ARG IMAGE_NAME
FROM ${IMAGE_NAME}
ARG UBUNTU_VERSION
ARG CUDA_VERSION
ENV DEBIAN_FRONTEND noninteractive
# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install katex
ARG KATEX
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
ARG CONDA_CMAKE
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install gcc
ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh
# Install clang
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh
# (optional) Install protobuf for ONNX
ARG PROTOBUF
COPY ./common/install_protobuf.sh install_protobuf.sh
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV and ffmpeg
ARG VISION
COPY ./common/install_vision.sh install_vision.sh
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}
# (optional) Install UCC
ARG UCX_COMMIT
ARG UCC_COMMIT
ENV UCX_COMMIT $UCX_COMMIT
ENV UCC_COMMIT $UCC_COMMIT
ENV UCX_HOME /usr
ENV UCC_HOME /usr
ADD ./common/install_ucc.sh install_ucc.sh
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
RUN rm install_ucc.sh
COPY ./common/install_openssl.sh install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
RUN bash ./install_openssl.sh
ENV OPENSSL_DIR /opt/openssl
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
# See https://github.com/pytorch/pytorch/issues/82174
# TODO(sdym@fb.com):
# check if this is needed after full off Xenial migration
ENV CARGO_NET_GIT_FETCH_WITH_CLI true
RUN bash ./install_cache.sh && rm install_cache.sh
ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
# Add jni.h for java host build
COPY ./common/install_jni.sh install_jni.sh
COPY ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
# Install Open MPI for CUDA
COPY ./common/install_openmpi.sh install_openmpi.sh
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
RUN rm install_openmpi.sh
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
ENV CUDA_PATH /usr/local/cuda
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
# Install CUDNN
ARG CUDNN_VERSION
ARG CUDA_VERSION
COPY ./common/install_cudnn.sh install_cudnn.sh
RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
RUN rm install_cudnn.sh
# Delete /usr/local/cuda-11.X/cuda-11.X symlinks
RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
USER jenkins
CMD ["bash"]

View File

@ -0,0 +1 @@
*.sh

View File

@ -0,0 +1,102 @@
ARG UBUNTU_VERSION
FROM ubuntu:${UBUNTU_VERSION}
ARG UBUNTU_VERSION
ENV DEBIAN_FRONTEND noninteractive
# Set AMD gpu targets to build for
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Install clang
ARG LLVMDEV
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install gcc
ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh
# (optional) Install protobuf for ONNX
ARG PROTOBUF
COPY ./common/install_protobuf.sh install_protobuf.sh
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV and ffmpeg
ARG VISION
COPY ./common/install_vision.sh install_vision.sh
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}
# Install rocm
ARG ROCM_VERSION
COPY ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh
RUN rm install_rocm.sh
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh
RUN rm install_rocm_magma.sh
ENV PATH /opt/rocm/bin:$PATH
ENV PATH /opt/rocm/hcc/bin:$PATH
ENV PATH /opt/rocm/hip/bin:$PATH
ENV PATH /opt/rocm/opencl/bin:$PATH
ENV PATH /opt/rocm/llvm/bin:$PATH
ENV MAGMA_HOME /opt/rocm/magma
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh
# (optional) Install non-default Ninja version
ARG NINJA_VERSION
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
USER jenkins
CMD ["bash"]

View File

@ -0,0 +1,165 @@
ARG UBUNTU_VERSION
FROM ubuntu:${UBUNTU_VERSION}
ARG UBUNTU_VERSION
ENV DEBIAN_FRONTEND noninteractive
ARG CLANG_VERSION
# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Install clang
ARG LLVMDEV
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh
# (optional) Install thrift.
ARG THRIFT
COPY ./common/install_thrift.sh install_thrift.sh
RUN if [ -n "${THRIFT}" ]; then bash ./install_thrift.sh; fi
RUN rm install_thrift.sh
ENV INSTALLED_THRIFT ${THRIFT}
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install katex
ARG KATEX
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install gcc
ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh
# Install lcov for C++ code coverage
COPY ./common/install_lcov.sh install_lcov.sh
RUN bash ./install_lcov.sh && rm install_lcov.sh
# Install cuda and cudnn
ARG CUDA_VERSION
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
# (optional) Install UCC
ARG UCX_COMMIT
ARG UCC_COMMIT
ENV UCX_COMMIT $UCX_COMMIT
ENV UCC_COMMIT $UCC_COMMIT
ENV UCX_HOME /usr
ENV UCC_HOME /usr
ADD ./common/install_ucc.sh install_ucc.sh
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
RUN rm install_ucc.sh
# (optional) Install protobuf for ONNX
ARG PROTOBUF
COPY ./common/install_protobuf.sh install_protobuf.sh
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV and ffmpeg
ARG VISION
COPY ./common/install_vision.sh install_vision.sh
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}
# (optional) Install Android NDK
ARG ANDROID
ARG ANDROID_NDK
ARG GRADLE_VERSION
COPY ./common/install_android.sh install_android.sh
COPY ./android/AndroidManifest.xml AndroidManifest.xml
COPY ./android/build.gradle build.gradle
RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi
RUN rm install_android.sh
RUN rm AndroidManifest.xml
RUN rm build.gradle
ENV INSTALLED_ANDROID ${ANDROID}
# (optional) Install Vulkan SDK
ARG VULKAN_SDK_VERSION
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
RUN rm install_vulkan_sdk.sh
# (optional) Install swiftshader
ARG SWIFTSHADER
COPY ./common/install_swiftshader.sh install_swiftshader.sh
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
RUN rm install_swiftshader.sh
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh
# (optional) Install non-default Ninja version
ARG NINJA_VERSION
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh
COPY ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
ENV OPENSSL_DIR /opt/openssl
RUN rm install_openssl.sh
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Add jni.h for java host build
COPY ./common/install_jni.sh install_jni.sh
COPY ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
# Install Open MPI for CUDA
COPY ./common/install_openmpi.sh install_openmpi.sh
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
RUN rm install_openmpi.sh
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
ENV CUDA_PATH /usr/local/cuda
USER jenkins
CMD ["bash"]

14
.ci/onnx/README.md 100644
View File

@ -0,0 +1,14 @@
# Jenkins
The scripts in this directory are the entrypoint for testing ONNX exporter.
The environment variable `BUILD_ENVIRONMENT` is expected to be set to
the build environment you intend to test. It is a hint for the build
and test scripts to configure Caffe2 a certain way and include/exclude
tests. Docker images, they equal the name of the image itself. For
example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
built on Jenkins and are used in triggered builds already have this
environment variable set in their manifest. Also see
`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.

19
.ci/onnx/common.sh 100644
View File

@ -0,0 +1,19 @@
set -ex
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
TEST_DIR="$ROOT_DIR/test"
pytest_reports_dir="${TEST_DIR}/test-reports/python"
# Figure out which Python to use
PYTHON="$(which python)"
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
PYTHON=$(which "python${BASH_REMATCH[1]}")
fi
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
unset HIP_PLATFORM
fi
mkdir -p "$pytest_reports_dir" || true

74
.ci/onnx/test.sh 100755
View File

@ -0,0 +1,74 @@
#!/bin/bash
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
pip install click mock tabulate networkx==2.0
pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
fi
# Skip tests in environments where they are not built/applicable
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
echo 'Skipping tests'
exit 0
fi
if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
# temporary to locate some kernel issues on the CI nodes
export HSAKMT_DEBUG_LEVEL=4
fi
# These additional packages are needed for circleci ROCm builds.
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
# Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
# defaults installs the most recent networkx version, so we install this lower
# version explicitly before scikit-image pulls it in as a dependency
pip install networkx==2.0
# click - onnx
pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
fi
################################################################################
# Python tests #
################################################################################
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
exit 0
fi
# If pip is installed as root, we must use sudo.
# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
PIP=$(which pip)
PIP_USER=$(stat --format '%U' $PIP)
CURRENT_USER=$(id -u -n)
if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
MAYBE_SUDO=sudo
fi
# Uninstall pre-installed hypothesis and coverage to use an older version as newer
# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
$MAYBE_SUDO pip -q uninstall -y hypothesis
$MAYBE_SUDO pip -q uninstall -y coverage
# "pip install hypothesis==3.44.6" from official server is unreliable on
# CircleCI, so we host a copy on S3 instead
$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
$MAYBE_SUDO pip -q install hypothesis==4.57.1
##############
# ONNX tests #
##############
if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
pip install -q --user transformers==4.25.1
pip install -q --user ninja flatbuffers==2.0 numpy==1.22.4 onnxruntime==1.14.0 beartype==0.10.4
# TODO: change this when onnx 1.13.1 is released.
pip install --no-use-pep517 'onnx @ git+https://github.com/onnx/onnx@e192ba01e438d22ca2dedd7956e28e3551626c91'
# TODO: change this when onnx-script is on testPypi
pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@a71e35bcd72537bf7572536ee57250a0c0488bf6'
# numba requires numpy <= 1.20, onnxruntime requires numpy >= 1.21.
# We don't actually need it for our tests, but it's imported if it's present, so uninstall.
pip uninstall -q --yes numba
# JIT C++ extensions require ninja, so put it into PATH.
export PATH="/var/lib/jenkins/.local/bin:$PATH"
"$ROOT_DIR/scripts/onnx/test.sh"
fi

View File

@ -0,0 +1,4 @@
source-path=SCRIPTDIR
# we'd like to enable --external-sources here but can't
# https://github.com/koalaman/shellcheck/issues/1818

View File

@ -0,0 +1,42 @@
This directory contains scripts for our continuous integration.
One important thing to keep in mind when reading the scripts here is
that they are all based off of Docker images, which we build for each of
the various system configurations we want to run on Jenkins. This means
it is very easy to run these tests yourself:
1. Figure out what Docker image you want. The general template for our
images look like:
``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
where ``$BUILD_ENVIRONMENT`` is one of the build environments
enumerated in
[pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.ci/docker/build.sh). The dockerfile used by jenkins can be found under the `.ci` [directory](https://github.com/pytorch/pytorch/blob/master/.ci/docker)
2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
run one of the scripts in this directory.
The Docker images are designed so that any "reasonable" build commands
will work; if you look in [build.sh](build.sh) you will see that it is a
very simple script. This is intentional. Idiomatic build instructions
should work inside all of our Docker images. You can tweak the commands
however you need (e.g., in case you want to rebuild with DEBUG, or rerun
the build with higher verbosity, etc.).
We have to do some work to make this so. Here is a summary of the
mechanisms we use:
- We install binaries to directories like `/usr/local/bin` which
are automatically part of your PATH.
- We add entries to the PATH using Docker ENV variables (so
they apply when you enter Docker) and `/etc/environment` (so they
continue to apply even if you sudo), instead of modifying
`PATH` in our build scripts.
- We use `/etc/ld.so.conf.d` to register directories containing
shared libraries, instead of modifying `LD_LIBRARY_PATH` in our
build scripts.
- We reroute well known paths like `/usr/bin/gcc` to alternate
implementations with `update-alternatives`, instead of setting
`CC` and `CXX` in our implementations.

View File

@ -0,0 +1,42 @@
#!/bin/bash
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
echo "Clang version:"
clang --version
python tools/stats/export_test_times.py
# detect_leaks=0: Python is very leaky, so we need suppress it
# symbolize=1: Gives us much better errors when things go wrong
export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=1:symbolize=1:detect_odr_violation=0
if [ -n "$(which conda)" ]; then
export CMAKE_PREFIX_PATH=/opt/conda
fi
# TODO: Make the ASAN flags a centralized env var and unify with USE_ASAN option
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-address-use-after-scope -shared-libasan" \
USE_ASAN=1 USE_CUDA=0 USE_MKLDNN=0 \
python setup.py bdist_wheel
pip_install_whl "$(echo dist/*.whl)"
# Test building via the sdist source tarball
python setup.py sdist
mkdir -p /tmp/tmp
pushd /tmp/tmp
tar zxf "$(dirname "${BASH_SOURCE[0]}")/../../dist/"*.tar.gz
cd torch-*
python setup.py build --cmake-only
popd
print_sccache_stats
assert_git_not_dirty

View File

@ -0,0 +1,34 @@
#!/usr/bin/env bash
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
set -eu -o pipefail
# This script uses linux host toolchain + mobile build options in order to
# build & test mobile libtorch without having to setup Android/iOS
# toolchain/simulator.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
# Install torch & torchvision - used to download & trace test model.
# Ideally we should use the libtorch built on the PR so that backward
# incompatible changes won't break this script - but it will significantly slow
# down mobile CI jobs.
# Here we install nightly instead of stable so that we have an option to
# temporarily skip mobile CI jobs on BC-breaking PRs until they are in nightly.
retry pip install --pre torch torchvision \
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html \
--progress-bar off
# Run end-to-end process of building mobile library, linking into the predictor
# binary, and running forward pass with a real model.
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
test/mobile/lightweight_dispatch/build.sh
else
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
fi
print_sccache_stats

View File

@ -0,0 +1,29 @@
#!/bin/bash
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
echo "Clang version:"
clang --version
python tools/stats/export_test_times.py
if [ -n "$(which conda)" ]; then
export CMAKE_PREFIX_PATH=/opt/conda
fi
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
CFLAGS="-fsanitize=thread" \
USE_TSAN=1 USE_CUDA=0 USE_MKLDNN=0 \
python setup.py bdist_wheel
pip_install_whl "$(echo dist/*.whl)"
print_sccache_stats
assert_git_not_dirty

View File

@ -0,0 +1,318 @@
#!/bin/bash
set -ex
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
if [[ "$BUILD_ENVIRONMENT" == *-clang7-asan* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *-clang7-tsan* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-tsan.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
fi
echo "Python version:"
python --version
echo "GCC version:"
gcc --version
echo "CMake version:"
cmake --version
echo "Environment variables:"
env
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
echo "NVCC version:"
nvcc --version
fi
if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
# TODO: there is a linking issue when building with UCC using clang,
# disable it for now and to be fix later.
export USE_UCC=1
export USE_SYSTEM_UCC=1
fi
fi
if [[ ${BUILD_ENVIRONMENT} == *"caffe2"* ]]; then
echo "Caffe2 build is ON"
export BUILD_CAFFE2=ON
fi
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
export ATEN_THREADING=TBB
export USE_TBB=1
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
export ATEN_THREADING=NATIVE
fi
# Enable LLVM dependency for TensorExpr testing
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
export USE_LLVM=/opt/rocm/llvm
export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
else
export USE_LLVM=/opt/llvm
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
fi
if ! which conda; then
# In ROCm CIs, we are doing cross compilation on build machines with
# intel cpu and later run tests on machines with amd cpu.
# Also leave out two builds to make sure non-mkldnn builds still work.
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
export USE_MKLDNN=1
else
export USE_MKLDNN=0
fi
else
export CMAKE_PREFIX_PATH=/opt/conda
fi
if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
POSSIBLE_JAVA_HOMES=()
POSSIBLE_JAVA_HOMES+=(/usr/local)
POSSIBLE_JAVA_HOMES+=(/usr/lib/jvm/java-8-openjdk-amd64)
POSSIBLE_JAVA_HOMES+=(/Library/Java/JavaVirtualMachines/*.jdk/Contents/Home)
# Add the Windows-specific JNI
POSSIBLE_JAVA_HOMES+=("$PWD/.circleci/windows-jni/")
for JH in "${POSSIBLE_JAVA_HOMES[@]}" ; do
if [[ -e "$JH/include/jni.h" ]] ; then
# Skip if we're not on Windows but haven't found a JAVA_HOME
if [[ "$JH" == "$PWD/.circleci/windows-jni/" && "$OSTYPE" != "msys" ]] ; then
break
fi
echo "Found jni.h under $JH"
export JAVA_HOME="$JH"
export BUILD_JNI=ON
break
fi
done
if [ -z "$JAVA_HOME" ]; then
echo "Did not find jni.h"
fi
fi
# Use special scripts for Android builds
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
export ANDROID_NDK=/opt/ndk
build_args=()
if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
build_args+=("-DANDROID_ABI=armeabi-v7a")
elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
build_args+=("-DANDROID_ABI=arm64-v8a")
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
build_args+=("-DANDROID_ABI=x86")
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
build_args+=("-DANDROID_ABI=x86_64")
fi
if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
build_args+=("-DUSE_VULKAN=ON")
fi
build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
exec ./scripts/build_android.sh "${build_args[@]}" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
export USE_VULKAN=1
# shellcheck disable=SC1091
source /var/lib/jenkins/vulkansdk/setup-env.sh
fi
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
# hcc used to run out of memory, silently exiting without stopping
# the build process, leaving undefined symbols in the shared lib,
# causing undefined symbol errors when later running tests.
# We used to set MAX_JOBS to 4 to avoid, but this is no longer an issue.
if [ -z "$MAX_JOBS" ]; then
export MAX_JOBS=$(($(nproc) - 1))
fi
if [[ -n "$CI" && -z "$PYTORCH_ROCM_ARCH" ]]; then
# Set ROCM_ARCH to gfx906 for CI builds, if user doesn't override.
echo "Limiting PYTORCH_ROCM_ARCH to gfx906 for CI builds"
export PYTORCH_ROCM_ARCH="gfx906"
fi
# hipify sources
python tools/amd_build/build_amd.py
fi
# sccache will fail for CUDA builds if all cores are used for compiling
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
if [ -z "$MAX_JOBS" ]; then
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
export MAX_JOBS=$(($(nproc) - 1))
fi
fi
# TORCH_CUDA_ARCH_LIST must be passed from an environment variable
if [[ "$BUILD_ENVIRONMENT" == *cuda* && -z "$TORCH_CUDA_ARCH_LIST" ]]; then
echo "TORCH_CUDA_ARCH_LIST must be defined"
exit 1
fi
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
export CC=clang
export CXX=clang++
fi
if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
export USE_PER_OPERATOR_HEADERS=0
fi
if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
export USE_PRECOMPILED_HEADERS=1
fi
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
export USE_GLOO_WITH_OPENSSL=ON
fi
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi
if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
set -e
get_bazel
# Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
# the runner
BAZEL_MEM_LIMIT="--local_ram_resources=HOST_RAM*.8"
BAZEL_CPU_LIMIT="--local_cpu_resources=HOST_CPUS-1"
tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" //...
# Build torch, the Python module, and tests for CPU-only
tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" --config=cpu-only :torch :_C.so :all_tests
else
# check that setup.py would fail with bad arguments
echo "The next three invocations are expected to fail with invalid command error messages."
( ! get_exit_code python setup.py bad_argument )
( ! get_exit_code python setup.py clean] )
( ! get_exit_code python setup.py clean bad_argument )
if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
# rocm builds fail when WERROR=1
# XLA test build fails when WERROR=1
# set only when building other architectures
# or building non-XLA tests.
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
"$BUILD_ENVIRONMENT" != *xla* ]]; then
WERROR=1 python setup.py bdist_wheel
else
python setup.py bdist_wheel
fi
pip_install_whl "$(echo dist/*.whl)"
# TODO: I'm not sure why, but somehow we lose verbose commands
set -x
assert_git_not_dirty
# Copy ninja build logs to dist folder
mkdir -p dist
if [ -f build/.ninja_log ]; then
cp build/.ninja_log dist
fi
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
# remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
sudo rm -f /opt/cache/bin/cc
sudo rm -f /opt/cache/bin/c++
sudo rm -f /opt/cache/bin/gcc
sudo rm -f /opt/cache/bin/g++
pushd /opt/rocm/llvm/bin
if [[ -d original ]]; then
sudo mv original/clang .
sudo mv original/clang++ .
fi
sudo rm -rf original
popd
fi
CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}
CUSTOM_TEST_USE_ROCM=$([[ "$BUILD_ENVIRONMENT" == *rocm* ]] && echo "ON" || echo "OFF")
CUSTOM_TEST_MODULE_PATH="${PWD}/cmake/public"
mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
# Build custom operator tests.
CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
CUSTOM_OP_TEST="$PWD/test/custom_operator"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
mkdir -p "$CUSTOM_OP_BUILD"
pushd "$CUSTOM_OP_BUILD"
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
assert_git_not_dirty
# Build jit hook tests
JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
JIT_HOOK_TEST="$PWD/test/jit_hooks"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
mkdir -p "$JIT_HOOK_BUILD"
pushd "$JIT_HOOK_BUILD"
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
assert_git_not_dirty
# Build custom backend tests.
CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
python --version
mkdir -p "$CUSTOM_BACKEND_BUILD"
pushd "$CUSTOM_BACKEND_BUILD"
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
assert_git_not_dirty
else
# Test no-Python build
echo "Building libtorch"
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
# 16 CPUs
MAX_JOBS=$(nproc --ignore=4)
export MAX_JOBS
# NB: Install outside of source directory (at the same level as the root
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
mkdir -p ../cpp-build/caffe2
pushd ../cpp-build/caffe2
WERROR=1 VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
popd
fi
fi
if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
# export test times so that potential sharded tests that'll branch off this build will use consistent data
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
python tools/stats/export_test_times.py
fi
print_sccache_stats

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# This script can also be used to test whether your diff changes any codegen output.
#
# Run it before and after your change:
# .ci/pytorch/codegen-test.sh <baseline_output_dir>
# .ci/pytorch/codegen-test.sh <test_output_dir>
#
# Then run diff to compare the generated files:
# diff -Naur <baseline_output_dir> <test_output_dir>
set -eu -o pipefail
if [ "$#" -eq 0 ]; then
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
OUT="$(dirname "${BASH_SOURCE[0]}")/../../codegen_result"
else
OUT=$1
fi
set -x
rm -rf "$OUT"
# aten codegen
python -m torchgen.gen \
-s aten/src/ATen \
-d "$OUT"/torch/share/ATen
# torch codegen
python -m tools.setup_helpers.generate_code \
--install_dir "$OUT"
# pyi codegen
mkdir -p "$OUT"/pyi/torch/_C
mkdir -p "$OUT"/pyi/torch/nn
python -m tools.pyi.gen_pyi \
--native-functions-path aten/src/ATen/native/native_functions.yaml \
--tags-path aten/src/ATen/native/tags.yaml \
--deprecated-functions-path tools/autograd/deprecated.yaml \
--out "$OUT"/pyi
# autograd codegen (called by torch codegen but can run independently)
python -m tools.autograd.gen_autograd \
"$OUT"/torch/share/ATen/Declarations.yaml \
aten/src/ATen/native/native_functions.yaml \
aten/src/ATen/native/tags.yaml \
"$OUT"/autograd \
tools/autograd
# annotated_fn_args codegen (called by torch codegen but can run independently)
mkdir -p "$OUT"/annotated_fn_args
python -m tools.autograd.gen_annotated_fn_args \
aten/src/ATen/native/native_functions.yaml \
aten/src/ATen/native/tags.yaml \
"$OUT"/annotated_fn_args \
tools/autograd

View File

@ -0,0 +1,58 @@
#!/bin/bash
# Required environment variables:
# $BUILD_ENVIRONMENT (should be set by your Docker image)
if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
# Save the absolute path in case later we chdir (as occurs in the gpu perf test)
script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
if which sccache > /dev/null; then
# Save sccache logs to file
sccache --stop-server > /dev/null 2>&1 || true
rm -f ~/sccache_error.log || true
function sccache_epilogue() {
echo "::group::Sccache Compilation Log"
echo '=================== sccache compilation log ==================='
python "$script_dir/print_sccache_log.py" ~/sccache_error.log 2>/dev/null || true
echo '=========== If your build fails, please take a look at the log above for possible reasons ==========='
sccache --show-stats
sccache --stop-server || true
echo "::endgroup::"
}
# Register the function here so that the error log can be printed even when
# sccache fails to start, i.e. timeout error
trap_add sccache_epilogue EXIT
if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
# sccache --start-server seems to hang forever on self hosted runners for GHA
# so let's just go ahead and skip the --start-server altogether since it seems
# as though sccache still gets used even when the sscache server isn't started
# explicitly
echo "Skipping sccache server initialization, setting environment variables"
export SCCACHE_IDLE_TIMEOUT=1200
export SCCACHE_ERROR_LOG=~/sccache_error.log
export RUST_LOG=sccache::server=error
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
else
# increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
# https://github.com/pytorch/pytorch/pull/16645
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=1200 RUST_LOG=sccache::server=error sccache --start-server
fi
# Report sccache stats for easier debugging
sccache --zero-stats
fi
if which ccache > /dev/null; then
# Report ccache stats for easier debugging
ccache --zero-stats
ccache --show-stats
function ccache_epilogue() {
ccache --show-stats
}
trap_add ccache_epilogue EXIT
fi
fi

View File

@ -0,0 +1,28 @@
#!/bin/bash
# Common setup for all Jenkins scripts
# shellcheck source=./common_utils.sh
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
set -ex
# Required environment variables:
# $BUILD_ENVIRONMENT (should be set by your Docker image)
# Figure out which Python to use for ROCm
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
# HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
unset HIP_PLATFORM
export PYTORCH_TEST_WITH_ROCM=1
# temporary to locate some kernel issues on the CI nodes
export HSAKMT_DEBUG_LEVEL=4
# improve rccl performance for distributed tests
export HSA_FORCE_FINE_GRAIN_PCIE=1
fi
# TODO: Renable libtorch testing for MacOS, see https://github.com/pytorch/pytorch/issues/62598
# shellcheck disable=SC2034
BUILD_TEST_LIBTORCH=0
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}

View File

@ -0,0 +1,236 @@
#!/bin/bash
# Common util **functions** that can be sourced in other scripts.
# note: printf is used instead of echo to avoid backslash
# processing and to properly handle values that begin with a '-'.
log() { printf '%s\n' "$*"; }
error() { log "ERROR: $*" >&2; }
fatal() { error "$@"; exit 1; }
retry () {
"$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
}
# compositional trap taken from https://stackoverflow.com/a/7287873/23845
# appends a command to a trap
#
# - 1st arg: code to add
# - remaining args: names of traps to modify
#
trap_add() {
trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error"
for trap_add_name in "$@"; do
trap -- "$(
# helper fn to get existing trap command from output
# of trap -p
extract_trap_cmd() { printf '%s\n' "$3"; }
# print existing trap command with newline
eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
# print the new trap command
printf '%s\n' "${trap_add_cmd}"
)" "${trap_add_name}" \
|| fatal "unable to add to trap ${trap_add_name}"
done
}
# set the trace attribute for the above function. this is
# required to modify DEBUG or RETURN traps because functions don't
# inherit them unless the trace attribute is set
declare -f -t trap_add
function assert_git_not_dirty() {
# TODO: we should add an option to `build_amd.py` that reverts the repo to
# an unmodified state.
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
git_status=$(git status --porcelain)
if [[ $git_status ]]; then
echo "Build left local git repository checkout dirty"
echo "git status --porcelain:"
echo "${git_status}"
exit 1
fi
fi
}
function pip_install_whl() {
# This is used to install PyTorch and other build artifacts wheel locally
# without using any network connection
python3 -mpip install --no-index --no-deps "$@"
}
function pip_install() {
# retry 3 times
# old versions of pip don't have the "--progress-bar" flag
pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
pip install "$@" || pip install "$@" || pip install "$@"
}
function pip_uninstall() {
# uninstall 2 times
pip uninstall -y "$@" || pip uninstall -y "$@"
}
function get_exit_code() {
set +e
"$@"
retcode=$?
set -e
return $retcode
}
function get_bazel() {
if [[ $(uname) == "Darwin" ]]; then
# download bazel version
retry curl https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-darwin-x86_64 -Lo tools/bazel
# verify content
echo '74d93848f0c9d592e341e48341c53c87e3cb304a54a2a1ee9cff3df422f0b23c tools/bazel' | shasum -a 256 -c >/dev/null
else
# download bazel version
retry curl https://ossci-linux.s3.amazonaws.com/bazel-4.2.1-linux-x86_64 -o tools/bazel
# verify content
echo '1a4f3a3ce292307bceeb44f459883859c793436d564b95319aacb8af1f20557c tools/bazel' | shasum -a 256 -c >/dev/null
fi
chmod +x tools/bazel
}
function install_monkeytype {
# Install MonkeyType
pip_install MonkeyType
}
function get_pinned_commit() {
cat .github/ci_commit_pins/"${1}".txt
}
function install_torchtext() {
local commit
commit=$(get_pinned_commit text)
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${commit}"
}
function install_torchvision() {
local commit
commit=$(get_pinned_commit vision)
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
}
function clone_pytorch_xla() {
if [[ ! -d ./xla ]]; then
git clone --recursive -b r2.0 --quiet https://github.com/pytorch/xla.git
pushd xla
# pin the xla hash so that we don't get broken by changes to xla
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
git submodule sync
git submodule update --init --recursive
popd
fi
}
function install_filelock() {
pip_install filelock
}
function install_triton() {
local commit
if [[ "${TEST_CONFIG}" == *rocm* ]]; then
echo "skipping triton due to rocm"
else
commit=$(get_pinned_commit triton)
if [[ "${BUILD_ENVIRONMENT}" == *gcc7* ]]; then
# Trition needs gcc-9 to build
sudo apt-get install -y g++-9
CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
elif [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
# Trition needs <filesystem> which surprisingly is not available with clang-9 toolchain
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt-get install -y g++-9
CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
else
pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
fi
pip_install --user jinja2
fi
}
function setup_torchdeploy_deps(){
conda install -y -n "py_${ANACONDA_PYTHON_VERSION}" "libpython-static=${ANACONDA_PYTHON_VERSION}"
local CC
local CXX
CC="$(which gcc)"
CXX="$(which g++)"
export CC
export CXX
pip install --upgrade pip
}
function checkout_install_torchdeploy() {
local commit
commit=$(get_pinned_commit multipy)
setup_torchdeploy_deps
pushd ..
git clone --recurse-submodules https://github.com/pytorch/multipy.git
pushd multipy
git checkout "${commit}"
python multipy/runtime/example/generate_examples.py
pip install -e .
popd
popd
}
function test_torch_deploy(){
pushd ..
pushd multipy
./multipy/runtime/build/test_deploy
popd
popd
}
function install_huggingface() {
local commit
commit=$(get_pinned_commit huggingface)
pip_install pandas
pip_install scipy
pip_install "git+https://github.com/huggingface/transformers.git@${commit}#egg=transformers"
}
function install_timm() {
local commit
commit=$(get_pinned_commit timm)
pip_install pandas
pip_install scipy
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
}
function checkout_install_torchbench() {
git clone https://github.com/pytorch/benchmark torchbench
pushd torchbench
git checkout no_torchaudio
if [ "$1" ]; then
python install.py --continue_on_fail models "$@"
else
# Occasionally the installation may fail on one model but it is ok to continue
# to install and test other models
python install.py --continue_on_fail
fi
popd
}
function test_functorch() {
python test/run_test.py --functorch --verbose
}
function print_sccache_stats() {
echo 'PyTorch Build Statistics'
sccache --show-stats
if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
sccache --show-stats --stats-format json | jq .stats \
> "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
else
echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
fi
}

View File

@ -0,0 +1,96 @@
from datetime import datetime, timedelta
from tempfile import mkdtemp
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography import x509
from cryptography.x509.oid import NameOID
from cryptography.hazmat.primitives import hashes
temp_dir = mkdtemp()
print(temp_dir)
def genrsa(path):
key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
)
with open(path, "wb") as f:
f.write(key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption(),
))
return key
def create_cert(path, C, ST, L, O, key):
subject = issuer = x509.Name([
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
])
cert = x509.CertificateBuilder().subject_name(
subject
).issuer_name(
issuer
).public_key(
key.public_key()
).serial_number(
x509.random_serial_number()
).not_valid_before(
datetime.utcnow()
).not_valid_after(
# Our certificate will be valid for 10 days
datetime.utcnow() + timedelta(days=10)
).add_extension(
x509.BasicConstraints(ca=True, path_length=None), critical=True,
).sign(key, hashes.SHA256())
# Write our certificate out to disk.
with open(path, "wb") as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
return cert
def create_req(path, C, ST, L, O, key):
csr = x509.CertificateSigningRequestBuilder().subject_name(x509.Name([
# Provide various details about who we are.
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
])).sign(key, hashes.SHA256())
with open(path, "wb") as f:
f.write(csr.public_bytes(serialization.Encoding.PEM))
return csr
def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
cert = x509.CertificateBuilder().subject_name(
csr_cert.subject
).issuer_name(
ca_cert.subject
).public_key(
csr_cert.public_key()
).serial_number(
x509.random_serial_number()
).not_valid_before(
datetime.utcnow()
).not_valid_after(
# Our certificate will be valid for 10 days
datetime.utcnow() + timedelta(days=10)
# Sign our certificate with our private key
).sign(private_ca_key, hashes.SHA256())
with open(path, "wb") as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
return cert
ca_key = genrsa(temp_dir + "/ca.key")
ca_cert = create_cert(temp_dir + "/ca.pem", u"US", u"New York", u"New York", u"Gloo Certificate Authority", ca_key)
pkey = genrsa(temp_dir + "/pkey.key")
csr = create_req(temp_dir + "/csr.csr", u"US", u"California", u"San Francisco", u"Gloo Testing Company", pkey)
cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)

View File

@ -0,0 +1,6 @@
#!/bin/bash
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
docker build -t pytorch .

View File

@ -0,0 +1,10 @@
#!/bin/bash
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
echo "Testing pytorch docs"
cd docs
pip_install -r requirements.txt
make doctest

View File

@ -0,0 +1 @@
raise ModuleNotFoundError("Sorry PyTorch, but our NumPy is in the other folder")

View File

@ -0,0 +1,11 @@
#!/bin/bash
if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-build* ]]; then
# shellcheck source=./macos-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/macos-build.sh"
fi
if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-test* ]]; then
# shellcheck source=./macos-test.sh
source "$(dirname "${BASH_SOURCE[0]}")/macos-test.sh"
fi

View File

@ -0,0 +1,80 @@
#!/bin/bash
# shellcheck disable=SC2034
# shellcheck source=./macos-common.sh
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
# Build PyTorch
if [ -z "${CI}" ]; then
export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer
fi
# This helper function wraps calls to binaries with sccache, but only if they're not already wrapped with sccache.
# For example, `clang` will be `sccache clang`, but `sccache clang` will not become `sccache sccache clang`.
# The way this is done is by detecting the command of the parent pid of the current process and checking whether
# that is sccache, and wrapping sccache around the process if its parent were not already sccache.
function write_sccache_stub() {
output=$1
binary=$(basename "${output}")
printf "#!/bin/sh\nif [ \$(ps auxc \$(ps auxc -o ppid \$\$ | grep \$\$ | rev | cut -d' ' -f1 | rev) | tr '\\\\n' ' ' | rev | cut -d' ' -f2 | rev) != sccache ]; then\n exec sccache %s \"\$@\"\nelse\n exec %s \"\$@\"\nfi" "$(which "${binary}")" "$(which "${binary}")" > "${output}"
chmod a+x "${output}"
}
if which sccache > /dev/null; then
# Create temp directory for sccache shims
tmp_dir=$(mktemp -d)
trap 'rm -rfv ${tmp_dir}' EXIT
write_sccache_stub "${tmp_dir}/clang++"
write_sccache_stub "${tmp_dir}/clang"
export PATH="${tmp_dir}:$PATH"
fi
cross_compile_arm64() {
# Cross compilation for arm64
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
}
compile_x86_64() {
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
}
build_lite_interpreter() {
echo "Testing libtorch (lite interpreter)."
CPP_BUILD="$(pwd)/../cpp_build"
# Ensure the removal of the tmp directory
trap 'rm -rfv ${CPP_BUILD}' EXIT
rm -rf "${CPP_BUILD}"
mkdir -p "${CPP_BUILD}/caffe2"
# It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
pushd "${CPP_BUILD}/caffe2" || exit
VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
popd || exit
"${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
}
if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
cross_compile_arm64
elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
export BUILD_LITE_INTERPRETER=1
build_lite_interpreter
else
compile_x86_64
fi
if which sccache > /dev/null; then
print_sccache_stats
fi
python tools/stats/export_test_times.py
assert_git_not_dirty

View File

@ -0,0 +1,14 @@
#!/bin/bash
# Common prelude for macos-build.sh and macos-test.sh
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
sysctl -a | grep machdep.cpu
# These are required for both the build job and the test job.
# In the latter to test cpp extensions.
export MACOSX_DEPLOYMENT_TARGET=10.9
export CXX=clang++
export CC=clang

View File

@ -0,0 +1,186 @@
#!/bin/bash
# shellcheck disable=SC2034
# shellcheck source=./macos-common.sh
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
if [[ -n "$CONDA_ENV" ]]; then
# Use binaries under conda environment
export PATH="$CONDA_ENV/bin":$PATH
fi
# Test that OpenMP is enabled for non-arm64 build
if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
pushd test
if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
exit 1
fi
popd
fi
setup_test_python() {
# The CircleCI worker hostname doesn't resolve to an address.
# This environment variable makes ProcessGroupGloo default to
# using the address associated with the loopback interface.
export GLOO_SOCKET_IFNAME=lo0
echo "Ninja version: $(ninja --version)"
# Increase default limit on open file handles from 256 to 1024
ulimit -n 1024
}
test_python_all() {
setup_test_python
time python test/run_test.py --verbose --exclude-jit-executor
assert_git_not_dirty
}
test_python_shard() {
if [[ -z "$NUM_TEST_SHARDS" ]]; then
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
exit 1
fi
setup_test_python
time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS"
assert_git_not_dirty
}
test_libtorch() {
# C++ API
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
# NB: Install outside of source directory (at the same level as the root
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
# But still clean it before we perform our own build.
echo "Testing libtorch"
CPP_BUILD="$PWD/../cpp-build"
rm -rf "$CPP_BUILD"
mkdir -p "$CPP_BUILD"/caffe2
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
pushd "$CPP_BUILD"/caffe2
VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
popd
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
# Unfortunately it seems like the test can't load from miniconda3
# without these paths being set
export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" "$CPP_BUILD"/caffe2/bin/test_api
assert_git_not_dirty
fi
}
print_cmake_info() {
CMAKE_EXEC=$(which cmake)
echo "$CMAKE_EXEC"
CONDA_INSTALLATION_DIR=$(dirname "$CMAKE_EXEC")
# Print all libraries under cmake rpath for debugging
ls -la "$CONDA_INSTALLATION_DIR/../lib"
export CMAKE_EXEC
# Explicitly add conda env lib folder to cmake rpath to address the flaky issue
# where cmake dependencies couldn't be found. This seems to point to how conda
# links $CMAKE_EXEC to its package cache when cloning a new environment
install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true
# Adding the rpath will invalidate cmake signature, so signing it again here
# to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid))
# with an exit code 137 otherwise
codesign -f -s - "${CMAKE_EXEC}" || true
}
test_custom_backend() {
print_cmake_info
echo "Testing custom backends"
pushd test/custom_backend
rm -rf build && mkdir build
pushd build
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
make VERBOSE=1
popd
# Run Python tests and export a lowered module.
python test_custom_backend.py -v
python backend.py --export-module-to=model.pt
# Run C++ tests using the exported module.
build/test_custom_backend ./model.pt
rm -f ./model.pt
popd
assert_git_not_dirty
}
test_custom_script_ops() {
print_cmake_info
echo "Testing custom script operators"
pushd test/custom_operator
# Build the custom operator library.
rm -rf build && mkdir build
pushd build
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
make VERBOSE=1
popd
# Run tests Python-side and export a script module.
python test_custom_ops.py -v
python model.py --export-script-module=model.pt
# Run tests C++-side and load the exported script module.
build/test_custom_ops ./model.pt
popd
assert_git_not_dirty
}
test_jit_hooks() {
print_cmake_info
echo "Testing jit hooks in cpp"
pushd test/jit_hooks
# Build the custom operator library.
rm -rf build && mkdir build
pushd build
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
make VERBOSE=1
popd
# Run tests Python-side and export a script module.
python model.py --export-script-module=model
# Run tests C++-side and load the exported script module.
build/test_jit_hooks ./model
popd
assert_git_not_dirty
}
if [[ "${TEST_CONFIG}" == *functorch* ]]; then
test_functorch
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
test_python_shard "${SHARD_NUMBER}"
if [[ "${SHARD_NUMBER}" == 1 ]]; then
test_libtorch
test_custom_script_ops
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
test_jit_hooks
test_custom_backend
fi
else
test_python_all
test_libtorch
test_custom_script_ops
test_jit_hooks
test_custom_backend
fi

View File

@ -0,0 +1,49 @@
#!/bin/bash
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
echo "Testing pytorch"
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
time python test/run_test.py --verbose -i distributed/test_c10d_common
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
time python test/run_test.py --verbose -i distributed/test_store
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
# FSDP tests
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
# ShardedTensor tests
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
assert_git_not_dirty

View File

@ -0,0 +1,22 @@
#!/bin/bash
set -e
run_test () {
rm -rf test_tmp/ && mkdir test_tmp/ && cd test_tmp/
"$@"
cd .. && rm -rf test_tmp/
}
get_runtime_of_command () {
TIMEFORMAT=%R
# runtime=$( { time ($@ &> /dev/null); } 2>&1 1>/dev/null)
runtime=$( { time "$@"; } 2>&1 1>/dev/null)
if [[ $runtime == *"Error"* ]]; then
exit 1
fi
runtime=${runtime#+++ $@}
runtime=$(python -c "print($runtime)")
echo "$runtime"
}

View File

@ -0,0 +1,79 @@
import sys
import json
import math
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--test-name', dest='test_name', action='store',
required=True, help='test name')
parser.add_argument('--sample-stats', dest='sample_stats', action='store',
required=True, help='stats from sample')
parser.add_argument('--update', action='store_true',
help='whether to update baseline using stats from sample')
args = parser.parse_args()
test_name = args.test_name
if 'cpu' in test_name:
backend = 'cpu'
elif 'gpu' in test_name:
backend = 'gpu'
data_file_path = '../{}_runtime.json'.format(backend)
with open(data_file_path) as data_file:
data = json.load(data_file)
if test_name in data:
mean = float(data[test_name]['mean'])
sigma = float(data[test_name]['sigma'])
else:
# Let the test pass if baseline number doesn't exist
mean = sys.maxsize
sigma = 0.001
print("population mean: ", mean)
print("population sigma: ", sigma)
# Let the test pass if baseline number is NaN (which happened in
# the past when we didn't have logic for catching NaN numbers)
if math.isnan(mean) or math.isnan(sigma):
mean = sys.maxsize
sigma = 0.001
sample_stats_data = json.loads(args.sample_stats)
sample_mean = float(sample_stats_data['mean'])
sample_sigma = float(sample_stats_data['sigma'])
print("sample mean: ", sample_mean)
print("sample sigma: ", sample_sigma)
if math.isnan(sample_mean):
raise Exception('''Error: sample mean is NaN''')
elif math.isnan(sample_sigma):
raise Exception('''Error: sample sigma is NaN''')
z_value = (sample_mean - mean) / sigma
print("z-value: ", z_value)
if z_value >= 3:
raise Exception('''\n
z-value >= 3, there is high chance of perf regression.\n
To reproduce this regression, run
`cd .ci/pytorch/perf_test/ && bash {}.sh` on your local machine
and compare the runtime before/after your code change.
'''.format(test_name))
else:
print("z-value < 3, no perf regression detected.")
if args.update:
print("We will use these numbers as new baseline.")
new_data_file_path = '../new_{}_runtime.json'.format(backend)
with open(new_data_file_path) as new_data_file:
new_data = json.load(new_data_file)
new_data[test_name] = {}
new_data[test_name]['mean'] = sample_mean
new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.1)
with open(new_data_file_path, 'w') as new_data_file:
json.dump(new_data, new_data_file, indent=4)

View File

@ -0,0 +1,16 @@
import sys
import json
import numpy
sample_data_list = sys.argv[1:]
sample_data_list = [float(v.strip()) for v in sample_data_list]
sample_mean = numpy.mean(sample_data_list)
sample_sigma = numpy.std(sample_data_list)
data = {
'mean': sample_mean,
'sigma': sample_sigma,
}
print(json.dumps(data))

View File

@ -0,0 +1,43 @@
#!/bin/bash
set -e
. ./common.sh
test_cpu_speed_mini_sequence_labeler () {
echo "Testing: mini sequence labeler, CPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/benchmark.git
cd benchmark/
git checkout 726567a455edbfda6199445922a8cfee82535664
cd scripts/mini_sequence_labeler
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python main.py)
SAMPLE_ARRAY+=("${runtime}")
done
cd ../../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_cpu_speed_mini_sequence_labeler "$@"
fi

View File

@ -0,0 +1,45 @@
#!/bin/bash
set -e
. ./common.sh
test_cpu_speed_mnist () {
echo "Testing: MNIST, CPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/examples.git -b perftests
cd examples/mnist
conda install -c pytorch torchvision-cpu
# Download data
python main.py --epochs 0
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_cpu_speed_mnist "$@"
fi

View File

@ -0,0 +1,29 @@
#!/bin/bash
. ./common.sh
test_cpu_speed_torch () {
echo "Testing: torch.*, CPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/yf225/perf-tests.git
if [ "$1" == "compare_with_baseline" ]; then
export ARGS=(--compare ../cpu_runtime.json)
elif [ "$1" == "compare_and_update" ]; then
export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
elif [ "$1" == "update_only" ]; then
export ARGS=(--update ../new_cpu_runtime.json)
fi
if ! python perf-tests/modules/test_cpu_torch.py "${ARGS[@]}"; then
echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
exit 1
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_cpu_speed_torch "$@"
fi

View File

@ -0,0 +1,29 @@
#!/bin/bash
. ./common.sh
test_cpu_speed_torch_tensor () {
echo "Testing: torch.Tensor.*, CPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/yf225/perf-tests.git
if [ "$1" == "compare_with_baseline" ]; then
export ARGS=(--compare ../cpu_runtime.json)
elif [ "$1" == "compare_and_update" ]; then
export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
elif [ "$1" == "update_only" ]; then
export ARGS=(--update ../new_cpu_runtime.json)
fi
if ! python perf-tests/modules/test_cpu_torch_tensor.py "${ARGS[@]}"; then
echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
exit 1
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_cpu_speed_torch_tensor "$@"
fi

View File

@ -0,0 +1,44 @@
#!/bin/bash
set -e
. ./common.sh
test_gpu_speed_cudnn_lstm () {
echo "Testing: CuDNN LSTM, GPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/benchmark.git
cd benchmark/
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
cd scripts/
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python cudnn_lstm.py --skip-cpu-governor-check)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_gpu_speed_cudnn_lstm "$@"
fi

View File

@ -0,0 +1,44 @@
#!/bin/bash
set -e
. ./common.sh
test_gpu_speed_lstm () {
echo "Testing: LSTM, GPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/benchmark.git
cd benchmark/
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
cd scripts/
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python lstm.py --skip-cpu-governor-check)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_gpu_speed_lstm "$@"
fi

View File

@ -0,0 +1,44 @@
#!/bin/bash
set -e
. ./common.sh
test_gpu_speed_mlstm () {
echo "Testing: MLSTM, GPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/benchmark.git
cd benchmark/
git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
cd scripts/
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python mlstm.py --skip-cpu-governor-check)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_gpu_speed_mlstm "$@"
fi

View File

@ -0,0 +1,48 @@
#!/bin/bash
set -e
. ./common.sh
test_gpu_speed_mnist () {
echo "Testing: MNIST, GPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/examples.git -b perftests
cd examples/mnist
conda install -c pytorch torchvision
# Download data
python main.py --epochs 0
SAMPLE_ARRAY=()
NUM_RUNS=$1
# Needs warm up to get accurate number
python main.py --epochs 1 --no-log
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_gpu_speed_mnist "$@"
fi

View File

@ -0,0 +1,53 @@
#!/bin/bash
set -e
. ./common.sh
test_gpu_speed_word_language_model () {
echo "Testing: word language model on Wikitext-2, GPU"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
git clone https://github.com/pytorch/examples.git -b perftests
cd examples/word_language_model
cd data/wikitext-2
# Reduce dataset size, so that we can have more runs per test
sed -n '1,200p' test.txt > test_tmp.txt
sed -n '1,1000p' train.txt > train_tmp.txt
sed -n '1,200p' valid.txt > valid_tmp.txt
mv test_tmp.txt test.txt
mv train_tmp.txt train.txt
mv valid_tmp.txt valid.txt
cd ../..
SAMPLE_ARRAY=()
NUM_RUNS=$1
for (( i=1; i<=NUM_RUNS; i++ )) do
runtime=$(get_runtime_of_command python main.py --cuda --epochs 1)
echo "$runtime"
SAMPLE_ARRAY+=("${runtime}")
done
cd ../..
stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
echo "Runtime stats in seconds:"
echo "$stats"
if [ "$2" == "compare_with_baseline" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
elif [ "$2" == "compare_and_update" ]; then
python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
fi
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
run_test test_gpu_speed_word_language_model "$@"
fi

View File

@ -0,0 +1,13 @@
import sys
import json
data_file_path = sys.argv[1]
commit_hash = sys.argv[2]
with open(data_file_path) as data_file:
data = json.load(data_file)
data['commit'] = commit_hash
with open(data_file_path, 'w') as data_file:
json.dump(data, data_file)

View File

@ -0,0 +1,17 @@
import sys
log_file_path = sys.argv[1]
with open(log_file_path) as f:
lines = f.readlines()
for line in lines:
# Ignore errors from CPU instruction set, symbol existing testing,
# or compilation error formatting
ignored_keywords = [
'src.c',
'CheckSymbolExists.c',
'test_compilation_error_formatting',
]
if all([keyword not in line for keyword in ignored_keywords]):
print(line)

View File

@ -0,0 +1,18 @@
#!/bin/bash
CREATE_TEST_CERT="$(dirname "${BASH_SOURCE[0]}")/create_test_cert.py"
TMP_CERT_DIR=$(python "$CREATE_TEST_CERT")
openssl verify -CAfile "${TMP_CERT_DIR}/ca.pem" "${TMP_CERT_DIR}/cert.pem"
export GLOO_DEVICE_TRANSPORT=TCP_TLS
export GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY=${TMP_CERT_DIR}/pkey.key
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT=${TMP_CERT_DIR}/cert.pem
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE=${TMP_CERT_DIR}/ca.pem
time python test/run_test.py --include distributed/test_c10d_gloo --verbose -- ProcessGroupGlooTest
unset GLOO_DEVICE_TRANSPORT
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE

View File

@ -0,0 +1,71 @@
#!/bin/bash
SCRIPT_PARENT_DIR=$(dirname "${BASH_SOURCE[0]}")
# shellcheck source=.ci/pytorch/common.sh
source "$SCRIPT_PARENT_DIR/common.sh"
cd .ci/pytorch/perf_test
echo "Running CPU perf test for PyTorch..."
pip install -q awscli
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
# More info at https://github.com/aws/aws-cli/issues/2321
aws configure set default.s3.multipart_threshold 5GB
UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# Get current default branch commit hash
DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
export DEFAULT_BRANCH_COMMIT_ID
fi
# Find the default branch commit to test against
git remote add upstream https://github.com/pytorch/pytorch.git
git fetch upstream
IFS=$'\n'
while IFS='' read -r commit_id; do
if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/"${commit_id}".json; then
LATEST_TESTED_COMMIT=${commit_id}
break
fi
done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/"${LATEST_TESTED_COMMIT}".json cpu_runtime.json
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# Prepare new baseline file
cp cpu_runtime.json new_cpu_runtime.json
python update_commit_hash.py new_cpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
fi
# Include tests
# shellcheck source=./perf_test/test_cpu_speed_mini_sequence_labeler.sh
. ./test_cpu_speed_mini_sequence_labeler.sh
# shellcheck source=./perf_test/test_cpu_speed_mnist.sh
. ./test_cpu_speed_mnist.sh
# shellcheck source=./perf_test/test_cpu_speed_torch.sh
. ./test_cpu_speed_torch.sh
# shellcheck source=./perf_test/test_cpu_speed_torch_tensor.sh
. ./test_cpu_speed_torch_tensor.sh
# Run tests
export TEST_MODE="compare_with_baseline"
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
export TEST_MODE="compare_and_update"
fi
# Operator tests
run_test test_cpu_speed_torch ${TEST_MODE}
run_test test_cpu_speed_torch_tensor ${TEST_MODE}
# Sample model tests
run_test test_cpu_speed_mini_sequence_labeler 20 ${TEST_MODE}
run_test test_cpu_speed_mnist 20 ${TEST_MODE}
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# This could cause race condition if we are testing the same default branch commit twice,
# but the chance of them executing this line at the same time is low.
aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
fi

View File

@ -0,0 +1,76 @@
#!/bin/bash
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
pushd .ci/pytorch/perf_test
echo "Running GPU perf test for PyTorch..."
# Trying to uninstall PyYAML can cause problem. Workaround according to:
# https://github.com/pypa/pip/issues/5247#issuecomment-415571153
pip install -q awscli --ignore-installed PyYAML
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
# More info at https://github.com/aws/aws-cli/issues/2321
aws configure set default.s3.multipart_threshold 5GB
UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# Get current default branch commit hash
DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
export DEFAULT_BRANCH_COMMIT_ID
fi
# Find the default branch commit to test against
git remote add upstream https://github.com/pytorch/pytorch.git
git fetch upstream
IFS=$'\n'
while IFS='' read -r commit_id; do
if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/"${commit_id}".json; then
LATEST_TESTED_COMMIT=${commit_id}
break
fi
done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/"${LATEST_TESTED_COMMIT}".json gpu_runtime.json
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# Prepare new baseline file
cp gpu_runtime.json new_gpu_runtime.json
python update_commit_hash.py new_gpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
fi
# Include tests
# shellcheck source=./perf_test/test_gpu_speed_mnist.sh
. ./test_gpu_speed_mnist.sh
# shellcheck source=./perf_test/test_gpu_speed_word_language_model.sh
. ./test_gpu_speed_word_language_model.sh
# shellcheck source=./perf_test/test_gpu_speed_cudnn_lstm.sh
. ./test_gpu_speed_cudnn_lstm.sh
# shellcheck source=./perf_test/test_gpu_speed_lstm.sh
. ./test_gpu_speed_lstm.sh
# shellcheck source=./perf_test/test_gpu_speed_mlstm.sh
. ./test_gpu_speed_mlstm.sh
# Run tests
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
run_test test_gpu_speed_mnist 20 compare_and_update
run_test test_gpu_speed_word_language_model 20 compare_and_update
run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
run_test test_gpu_speed_lstm 20 compare_and_update
run_test test_gpu_speed_mlstm 20 compare_and_update
else
run_test test_gpu_speed_mnist 20 compare_with_baseline
run_test test_gpu_speed_word_language_model 20 compare_with_baseline
run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline
run_test test_gpu_speed_lstm 20 compare_with_baseline
run_test test_gpu_speed_mlstm 20 compare_with_baseline
fi
if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
# This could cause race condition if we are testing the same default branch commit twice,
# but the chance of them executing this line at the same time is low.
aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
fi
popd

1007
.ci/pytorch/test.sh 100755

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
#!/bin/bash
# If you want to rebuild, run this with REBUILD=1
# If you want to build with CUDA, run this with USE_CUDA=1
# If you want to build without CUDA, run this with USE_CUDA=0
if [ ! -f setup.py ]; then
echo "ERROR: Please run this build script from PyTorch root directory."
exit 1
fi
SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
# shellcheck source=./common.sh
source "$SCRIPT_PARENT_DIR/common.sh"
# shellcheck source=./common-build.sh
source "$SCRIPT_PARENT_DIR/common-build.sh"
IMAGE_COMMIT_ID=$(git rev-parse HEAD)
export IMAGE_COMMIT_ID
export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}
if [[ ${JOB_NAME} == *"develop"* ]]; then
export IMAGE_COMMIT_TAG=develop-${IMAGE_COMMIT_TAG}
fi
export TMP_DIR="${PWD}/build/win_tmp"
TMP_DIR_WIN=$(cygpath -w "${TMP_DIR}")
export TMP_DIR_WIN
export PYTORCH_FINAL_PACKAGE_DIR=${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-results}
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
fi
# This directory is used only to hold "pytorch_env_restore.bat", called via "setup_pytorch_env.bat"
CI_SCRIPTS_DIR=$TMP_DIR/ci_scripts
mkdir -p "$CI_SCRIPTS_DIR"
if [ -n "$(ls "$CI_SCRIPTS_DIR"/*)" ]; then
rm "$CI_SCRIPTS_DIR"/*
fi
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
set +ex
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
PYLONG_API_CHECK=$?
if [[ $PYLONG_API_CHECK == 0 ]]; then
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
echo "because \`sizeof(long) == 4\` and \`sizeof(unsigned long) == 4\`."
echo "Please include \"torch/csrc/utils/python_numbers.h\" and use the correspoding APIs instead."
echo "PyLong_FromLong -> THPUtils_packInt32 / THPUtils_packInt64"
echo "PyLong_AsLong -> THPUtils_unpackInt (32-bit) / THPUtils_unpackLong (64-bit)"
echo "PyLong_FromUnsignedLong -> THPUtils_packUInt32 / THPUtils_packUInt64"
echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
exit 1
fi
set -ex
"$SCRIPT_HELPERS_DIR"/build_pytorch.bat
assert_git_not_dirty
if [ ! -f "${TMP_DIR}"/"${IMAGE_COMMIT_TAG}".7z ] && [ ! "${BUILD_ENVIRONMENT}" == "" ]; then
exit 1
fi
echo "BUILD PASSED"

View File

@ -0,0 +1,160 @@
if "%DEBUG%" == "1" (
set BUILD_TYPE=debug
) ELSE (
set BUILD_TYPE=release
)
set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
:: This inflates our log size slightly, but it is REALLY useful to be
:: able to see what our cl.exe commands are (since you can actually
:: just copy-paste them into a local Windows setup to just rebuild a
:: single file.)
:: log sizes are too long, but leaving this here incase someone wants to use it locally
:: set CMAKE_VERBOSE_MAKEFILE=1
set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
call %INSTALLER_DIR%\install_mkl.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
call %INSTALLER_DIR%\install_magma.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
call %INSTALLER_DIR%\install_sccache.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
:: We just need to activate it here
call %INSTALLER_DIR%\activate_miniconda3.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
:: Override VS env here
pushd .
if "%VC_VERSION%" == "" (
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
) else (
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
@echo on
popd
if not "%USE_CUDA%"=="1" goto cuda_build_end
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
exit /b 1
)
rem version transformer, for example 10.1 to 10_1.
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
exit /b 1
)
set VERSION_SUFFIX=%CUDA_VERSION:.=_%
set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
:cuda_build_end
set DISTUTILS_USE_SDK=1
set PATH=%TMP_DIR_WIN%\bin;%PATH%
:: The latest Windows CUDA test is running on AWS G5 runner with A10G GPU
if "%TORCH_CUDA_ARCH_LIST%" == "" set TORCH_CUDA_ARCH_LIST=8.6
:: The default sccache idle timeout is 600, which is too short and leads to intermittent build errors.
set SCCACHE_IDLE_TIMEOUT=0
set SCCACHE_IGNORE_SERVER_IO_ERROR=1
sccache --stop-server
sccache --start-server
sccache --zero-stats
set CC=sccache-cl
set CXX=sccache-cl
set CMAKE_GENERATOR=Ninja
if "%USE_CUDA%"=="1" (
:: randomtemp is used to resolve the intermittent build error related to CUDA.
:: code: https://github.com/peterjc123/randomtemp-rust
:: issue: https://github.com/pytorch/pytorch/issues/25393
::
:: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
:: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
cat %TMP_DIR%/bin/nvcc.bat
set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
)
@echo off
echo @echo off >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
@echo on
if "%REBUILD%" == "" (
if NOT "%BUILD_ENVIRONMENT%" == "" (
:: Create a shortcut to restore pytorch environment
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
)
python setup.py bdist_wheel
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
sccache --show-stats
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
(
if "%BUILD_ENVIRONMENT%"=="" (
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
) else (
if "%USE_CUDA%"=="1" (
7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\nvfuser && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
) else (
7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
:: export test times so that potential sharded tests that'll branch off this build will use consistent data
python tools/stats/export_test_times.py
copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
:: Also save build/.ninja_log as an artifact
copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
)
)
sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
sccache --stop-server

View File

@ -0,0 +1,4 @@
REM The first argument should the CUDA version
echo %PATH%
echo %CUDA_PATH%
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%1\bin;%PATH%

View File

@ -0,0 +1,19 @@
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
:: exit the batch once there's an error
if not errorlevel 0 (
echo "setup pytorch env failed"
echo %errorlevel%
exit /b
)
echo "Test functorch"
pushd test
python run_test.py --functorch --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
popd
if ERRORLEVEL 1 goto fail
:eof
exit /b 0
:fail
exit /b 1

View File

@ -0,0 +1,26 @@
if "%BUILD_ENVIRONMENT%"=="" (
set CONDA_PARENT_DIR=%CD%
) else (
set CONDA_PARENT_DIR=C:\Jenkins
)
:: Be conservative here when rolling out the new AMI with conda. This will try
:: to install conda as before if it couldn't find the conda installation. This
:: can be removed eventually after we gain enough confidence in the AMI
if not exist %CONDA_PARENT_DIR%\Miniconda3 (
set INSTALL_FRESH_CONDA=1
)
if "%INSTALL_FRESH_CONDA%"=="1" (
curl --retry 3 --retry-all-errors -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
:: Activate conda so that we can use its commands, i.e. conda, python, pip
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3

View File

@ -0,0 +1,37 @@
if "%CUDA_VERSION%" == "cpu" (
echo skip magma installation for cpu builds
exit /b 0
)
rem remove dot in cuda_version, fox example 11.1 to 111
if not "%USE_CUDA%"=="1" (
exit /b 0
)
if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
exit /b 1
)
set VERSION_SUFFIX=%CUDA_VERSION:.=%
set CUDA_SUFFIX=cuda%VERSION_SUFFIX%
if "%CUDA_SUFFIX%" == "" (
echo unknown CUDA version, please set `CUDA_VERSION` higher than 10.2
exit /b 1
)
if "%REBUILD%"=="" (
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --output %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z
) else (
aws s3 cp s3://ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --quiet
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
7z x -aoa %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z -o%TMP_DIR_WIN%\magma
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
set MAGMA_HOME=%TMP_DIR_WIN%\magma

View File

@ -0,0 +1,14 @@
if "%REBUILD%"=="" (
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z --output %TMP_DIR_WIN%\mkl.7z
) else (
aws s3 cp s3://ossci-windows/mkl_2020.2.254.7z %TMP_DIR_WIN%\mkl.7z --quiet
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
7z x -aoa %TMP_DIR_WIN%\mkl.7z -o%TMP_DIR_WIN%\mkl
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
set CMAKE_INCLUDE_PATH=%TMP_DIR_WIN%\mkl\include
set LIB=%TMP_DIR_WIN%\mkl\lib;%LIB%

View File

@ -0,0 +1,18 @@
mkdir %TMP_DIR_WIN%\bin
if "%REBUILD%"=="" (
:check_sccache
%TMP_DIR_WIN%\bin\sccache.exe --show-stats || (
taskkill /im sccache.exe /f /t || ver > nul
del %TMP_DIR_WIN%\bin\sccache.exe || ver > nul
del %TMP_DIR_WIN%\bin\sccache-cl.exe || ver > nul
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %TMP_DIR_WIN%\bin\sccache.exe
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output %TMP_DIR_WIN%\bin\sccache-cl.exe
) else (
aws s3 cp s3://ossci-windows/sccache.exe %TMP_DIR_WIN%\bin\sccache.exe
aws s3 cp s3://ossci-windows/sccache-cl.exe %TMP_DIR_WIN%\bin\sccache-cl.exe
)
goto :check_sccache
)
)

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python3
import subprocess
import os
COMMON_TESTS = [
(
"Checking that torch is available",
"import torch",
),
(
"Checking that MKL is available",
"import torch; exit(0 if torch.backends.mkl.is_available() else 1)",
),
]
GPU_TESTS = [
(
"Checking that CUDA archs are setup correctly",
"import torch; torch.randn([3,5]).cuda()",
),
(
"Checking that magma is available",
"import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)",
),
(
"Checking that CuDNN is available",
"import torch; exit(0 if torch.backends.cudnn.is_available() else 1)",
),
]
if __name__ == "__main__":
if 'USE_CUDA' in os.environ and os.environ['USE_CUDA'] == '1':
TESTS = COMMON_TESTS + GPU_TESTS
else:
TESTS = COMMON_TESTS
for description, python_commands in TESTS:
print(description)
command_args = ["python", "-c", python_commands]
command_string = " ".join(command_args)
print("Command:", command_string)
try:
subprocess.check_call(command_args)
except subprocess.CalledProcessError as e:
sdk_root = os.environ.get('WindowsSdkDir', 'C:\\Program Files (x86)\\Windows Kits\\10')
debugger = os.path.join(sdk_root, 'Debuggers', 'x64', 'cdb.exe')
if os.path.exists(debugger):
command_args = [debugger, "-o", "-c", "~*g; q"] + command_args
command_string = " ".join(command_args)
print("Reruning with traceback enabled")
print("Command:", command_string)
subprocess.run(command_args, check=False)
exit(e.returncode)

View File

@ -0,0 +1,73 @@
if exist "%TMP_DIR%/ci_scripts/pytorch_env_restore.bat" (
call %TMP_DIR%/ci_scripts/pytorch_env_restore.bat
exit /b 0
)
set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
:: Install Miniconda3
set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
:: We just need to activate it here
call %INSTALLER_DIR%\activate_miniconda3.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
pushd .
if "%VC_VERSION%" == "" (
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
) else (
call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
@echo on
popd
set DISTUTILS_USE_SDK=1
if not "%USE_CUDA%"=="1" goto cuda_build_end
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
rem version transformer, for example 10.1 to 10_1.
set VERSION_SUFFIX=%CUDA_VERSION:.=_%
set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
:cuda_build_end
set PYTHONPATH=%TMP_DIR_WIN%\build;%PYTHONPATH%
if NOT "%BUILD_ENVIRONMENT%"=="" (
pushd %TMP_DIR_WIN%\build
copy /Y %PYTORCH_FINAL_PACKAGE_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %TMP_DIR_WIN%\
:: 7z: -aos skips if exists because this .bat can be called multiple times
7z x %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z -aos
popd
) else (
xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
)
@echo off
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
@echo on
if NOT "%BUILD_ENVIRONMENT%" == "" (
:: Create a shortcut to restore pytorch environment
echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
)

View File

@ -0,0 +1,36 @@
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
git submodule update --init --recursive third_party/pybind11
cd test\custom_backend
:: Build the custom backend library.
mkdir build
pushd build
echo "Executing CMake for custom_backend test..."
:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
if ERRORLEVEL 1 exit /b 1
echo "Executing Ninja for custom_backend test..."
ninja -v
if ERRORLEVEL 1 exit /b 1
echo "Ninja succeeded for custom_backend test."
popd
:: Run tests Python-side and export a script module.
python test_custom_backend.py -v
if ERRORLEVEL 1 exit /b 1
python backend.py --export-module-to="build/model.pt"
if ERRORLEVEL 1 exit /b 1
:: Run tests C++-side and load the exported script module.
cd build
set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
test_custom_backend.exe model.pt
if ERRORLEVEL 1 exit /b 1

View File

@ -0,0 +1,41 @@
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
git submodule update --init --recursive third_party/pybind11
cd test\custom_operator
:: Build the custom operator library.
mkdir build
pushd build
echo "Executing CMake for custom_operator test..."
:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
if ERRORLEVEL 1 exit /b 1
echo "Executing Ninja for custom_operator test..."
ninja -v
if ERRORLEVEL 1 exit /b 1
echo "Ninja succeeded for custom_operator test."
popd
:: Run tests Python-side and export a script module.
python test_custom_ops.py -v
if ERRORLEVEL 1 exit /b 1
:: TODO: fix and re-enable this test
:: See https://github.com/pytorch/pytorch/issues/25155
:: python test_custom_classes.py -v
:: if ERRORLEVEL 1 exit /b 1
python model.py --export-script-module="build/model.pt"
if ERRORLEVEL 1 exit /b 1
:: Run tests C++-side and load the exported script module.
cd build
set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
test_custom_ops.exe model.pt
if ERRORLEVEL 1 exit /b 1

View File

@ -0,0 +1,24 @@
REM The first argument should lead to the python interpreter
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_common
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_gloo
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_nccl
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python.exe test/run_test.py --verbose -i distributed/test_data_parallel
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python.exe test/run_test.py --verbose -i distributed/test_store
if %errorlevel% neq 0 ( exit /b %errorlevel% )
%1\python.exe test/run_test.py --verbose -i distributed/test_pg_wrapper
if %errorlevel% neq 0 ( exit /b %errorlevel% )

Some files were not shown because too many files have changed in this diff Show More