Forklet of Pytorch

2023-11-08 09:01:59 -07:00 · 2023-11-08 09:01:59 -07:00 · e5ae8f15ad
commit e5ae8f15ad
11673 changed files with 3188227 additions and 0 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,115 @@
+build --cxxopt=--std=c++17
+build --copt=-I.
+# Bazel does not support including its cc_library targets as system
+# headers. We work around this for generated code
+# (e.g. c10/macros/cmake_macros.h) by making the generated directory a
+# system include path.
+build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
+build --copt=-isystem --copt bazel-out/darwin-fastbuild/bin
+build --experimental_ui_max_stdouterr_bytes=2048576
+
+# Configuration to disable tty features for environments like CI
+build:no-tty --curses no
+build:no-tty --progress_report_interval 10
+build:no-tty --show_progress_rate_limit 10
+
+# Build with GPU support by default.
+build --define=cuda=true
+# rules_cuda configuration
+build --@rules_cuda//cuda:enable_cuda
+build --@rules_cuda//cuda:cuda_targets=sm_52
+build --@rules_cuda//cuda:compiler=nvcc
+build --repo_env=CUDA_PATH=/usr/local/cuda
+
+# Configuration to build without GPU support
+build:cpu-only --define=cuda=false
+# define a separate build folder for faster switching between configs
+build:cpu-only --platform_suffix=-cpu-only
+# See the note on the config-less build for details about why we are
+# doing this. We must also do it for the "-cpu-only" platform suffix.
+build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
+# rules_cuda configuration
+build:cpu-only --@rules_cuda//cuda:enable_cuda=False
+
+# Definition of --config=shell
+# interactive shell immediately before execution
+build:shell --run_under="//tools/bazel_tools:shellwrap"
+
+# Disable all warnings for external repositories. We don't care about
+# their warnings.
+build --per_file_copt=^external/@-w
+
+# Set additional warnings to error level.
+#
+# Implementation notes:
+#  * we use file extensions to determine if we are using the C++
+#    compiler or the cuda compiler
+#  * we use ^// at the start of the regex to only permit matching
+#    PyTorch files. This excludes external repos.
+#
+# Note that because this is logically a command-line flag, it is
+# considered the word on what warnings are enabled. This has the
+# unfortunate consequence of preventing us from disabling an error at
+# the target level because those flags will come before these flags in
+# the action invocation. Instead we provide per-file exceptions after
+# this.
+#
+# On the bright side, this means we don't have to more broadly apply
+# the exceptions to an entire target.
+#
+# Looking for CUDA flags? We have a cu_library macro that we can edit
+# directly. Look in //tools/rules:cu.bzl for details. Editing the
+# macro over this has the following advantages:
+#  * making changes does not require discarding the Bazel analysis
+#    cache
+#  * it allows for selective overrides on individual targets since the
+#    macro-level opts will come earlier than target level overrides
+
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
+# The following warnings come from -Wall. We downgrade them from error
+# to warnings here.
+#
+# sign-compare has a tremendous amount of violations in the
+# codebase. It will be a lot of work to fix them, just disable it for
+# now.
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-sign-compare
+# We intentionally use #pragma unroll, which is compiler specific.
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
+
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
+# The following warnings come from -Wextra. We downgrade them from error
+# to warnings here.
+#
+# unused-parameter-compare has a tremendous amount of violations in the
+# codebase. It will be a lot of work to fix them, just disable it for
+# now.
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
+# missing-field-parameters has both a large number of violations in
+# the codebase, but it also is used pervasively in the Python C
+# API. There are a couple of catches though:
+# * we use multiple versions of the Python API and hence have
+#   potentially multiple different versions of each relevant
+#   struct. They may have different numbers of fields. It will be
+#   unwieldy to support multiple versions in the same source file.
+# * Python itself for many of these structs recommends only
+#   initializing a subset of the fields. We should respect the API
+#   usage conventions of our dependencies.
+#
+# Hence, we just disable this warning altogether. We may want to clean
+# up some of the clear-cut cases that could be risky, but we still
+# likely want to have this disabled for the most part.
+build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
+
+build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
+build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function
--- a/.bazelversion
+++ b/.bazelversion
@ -0,0 +1 @@
+4.2.1
--- a/.buckconfig.oss
+++ b/.buckconfig.oss
@ -0,0 +1,25 @@
+[pt]
+  is_oss=1
+
+[buildfile]
+  name = BUCK.oss
+  includes = //tools/build_defs/select.bzl
+
+[repositories]
+  bazel_skylib = third_party/bazel-skylib/
+  ovr_config = .
+
+[download]
+  in_build = true
+
+[cxx]
+  cxxflags = -std=c++17
+  should_remap_host_platform = true
+  cpp = /usr/bin/clang
+  cc = /usr/bin/clang
+  cxx = /usr/bin/clang++
+  cxxpp = /usr/bin/clang++
+  ld = /usr/bin/clang++
+
+[project]
+  default_flavors_mode=all
--- a/.ci/caffe2/README.md
+++ b/.ci/caffe2/README.md
@ -0,0 +1,14 @@
+# Jenkins
+
+The scripts in this directory are the entrypoint for testing Caffe2.
+
+The environment variable `BUILD_ENVIRONMENT` is expected to be set to
+the build environment you intend to test. It is a hint for the build
+and test scripts to configure Caffe2 a certain way and include/exclude
+tests. Docker images, they equal the name of the image itself. For
+example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
+built on Jenkins and are used in triggered builds already have this
+environment variable set in their manifest. Also see
+`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
+
+Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.
--- a/.ci/caffe2/common.sh
+++ b/.ci/caffe2/common.sh
@ -0,0 +1,36 @@
+set -ex
+
+LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
+TEST_DIR="$ROOT_DIR/test"
+gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
+pytest_reports_dir="${TEST_DIR}/test-reports/python"
+
+# Figure out which Python to use
+PYTHON="$(which python)"
+if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
+  PYTHON=$(which "python${BASH_REMATCH[1]}")
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
+    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
+    unset HIP_PLATFORM
+    if which sccache > /dev/null; then
+        # Save sccache logs to file
+        sccache --stop-server || true
+        rm -f ~/sccache_error.log || true
+        SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
+
+        # Report sccache stats for easier debugging
+        sccache --zero-stats
+    fi
+fi
+
+# /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
+# builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
+# that the test code in .ci/test.sh is the same
+INSTALL_PREFIX="/usr/local/caffe2"
+
+mkdir -p "$gtest_reports_dir" || true
+mkdir -p "$pytest_reports_dir" || true
+mkdir -p "$INSTALL_PREFIX" || true
--- a/.ci/caffe2/test.sh
+++ b/.ci/caffe2/test.sh
@ -0,0 +1,172 @@
+#!/bin/bash
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
+  pip install click mock tabulate networkx==2.0
+  pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
+fi
+
+# Skip tests in environments where they are not built/applicable
+if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
+  echo 'Skipping tests'
+  exit 0
+fi
+if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
+  # temporary to locate some kernel issues on the CI nodes
+  export HSAKMT_DEBUG_LEVEL=4
+fi
+# These additional packages are needed for circleci ROCm builds.
+if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
+    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
+    # defaults installs the most recent networkx version, so we install this lower
+    # version explicitly before scikit-image pulls it in as a dependency
+    pip install networkx==2.0
+    # click - onnx
+    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
+fi
+
+# Find where cpp tests and Caffe2 itself are installed
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  # For cmake only build we install everything into /usr/local
+  cpp_test_dir="$INSTALL_PREFIX/cpp_test"
+  ld_library_path="$INSTALL_PREFIX/lib"
+else
+  # For Python builds we install into python
+  # cd to /usr first so the python import doesn't get confused by any 'caffe2'
+  # directory in cwd
+  python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
+  caffe2_pypath="$python_installation/caffe2"
+  cpp_test_dir="$python_installation/torch/test"
+  ld_library_path="$python_installation/torch/lib"
+fi
+
+################################################################################
+# C++ tests #
+################################################################################
+# Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
+if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
+  echo "Running C++ tests.."
+  for test in $(find "$cpp_test_dir" -executable -type f); do
+    case "$test" in
+      # skip tests we know are hanging or bad
+      */mkl_utils_test|*/aten/integer_divider_test)
+        continue
+        ;;
+      */scalar_tensor_test|*/basic|*/native_test)
+        if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
+          continue
+        else
+          LD_LIBRARY_PATH="$ld_library_path" "$test"
+        fi
+        ;;
+      */*_benchmark)
+        LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
+        ;;
+      *)
+        # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
+        # planning to migrate to gtest as the common PyTorch c++ test suite, we
+        # currently do NOT use the xml test reporter, because Catch doesn't
+        # support multiple reporters
+        # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
+        # which means that enabling XML output means you lose useful stdout
+        # output for Jenkins.  It's more important to have useful console
+        # output than it is to have XML output for Jenkins.
+        # Note: in the future, if we want to use xml test reporter once we switch
+        # to all gtest, one can simply do:
+        LD_LIBRARY_PATH="$ld_library_path" \
+            "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
+        ;;
+    esac
+  done
+fi
+
+################################################################################
+# Python tests #
+################################################################################
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  exit 0
+fi
+
+# If pip is installed as root, we must use sudo.
+# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
+PIP=$(which pip)
+PIP_USER=$(stat --format '%U' $PIP)
+CURRENT_USER=$(id -u -n)
+if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
+  MAYBE_SUDO=sudo
+fi
+
+# Uninstall pre-installed hypothesis and coverage to use an older version as newer
+# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
+$MAYBE_SUDO pip -q uninstall -y hypothesis
+$MAYBE_SUDO pip -q uninstall -y coverage
+
+# "pip install hypothesis==3.44.6" from official server is unreliable on
+# CircleCI, so we host a copy on S3 instead
+$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
+$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
+$MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
+
+# Collect additional tests to run (outside caffe2/python)
+EXTRA_TESTS=()
+
+# CUDA builds always include NCCL support
+if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
+  EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
+fi
+
+rocm_ignore_test=()
+if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
+  # Currently these tests are failing on ROCM platform:
+
+  # On ROCm, RCCL (distributed) development isn't complete.
+  # https://github.com/ROCmSoftwarePlatform/rccl
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
+
+  # This test has been flaky in ROCm CI (but note the tests are
+  # cpu-only so should be unrelated to ROCm)
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
+  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
+  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
+fi
+
+echo "Running Python tests.."
+# locale setting is required by click package
+for loc in "en_US.utf8" "C.UTF-8"; do
+  if locale -a | grep "$loc" >/dev/null 2>&1; then
+    export LC_ALL="$loc"
+    export LANG="$loc"
+    break;
+  fi
+done
+
+# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
+export DNNL_MAX_CPU_ISA=AVX2
+
+# Should still run even in the absence of SHARD_NUMBER
+if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
+  # TODO(sdym@meta.com) remove this when the linked issue resolved.
+  # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
+  pip install --user py==1.11.0
+  pip install --user pytest-sugar
+  # NB: Warnings are disabled because they make it harder to see what
+  # the actual erroring test is
+  "$PYTHON" \
+    -m pytest \
+    -x \
+    -v \
+    --disable-warnings \
+    --junit-xml="$pytest_reports_dir/result.xml" \
+    --ignore "$caffe2_pypath/python/test/executor_test.py" \
+    --ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
+    --ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
+    --ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
+    --ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
+    ${rocm_ignore_test[@]} \
+    "$caffe2_pypath/python" \
+    "${EXTRA_TESTS[@]}"
+fi
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -0,0 +1,31 @@
+# Docker images for Jenkins
+
+This directory contains everything needed to build the Docker images
+that are used in our CI
+
+The Dockerfiles located in subdirectories are parameterized to
+conditionally run build stages depending on build arguments passed to
+`docker build`. This lets us use only a few Dockerfiles for many
+images. The different configurations are identified by a freeform
+string that we call a _build environment_. This string is persisted in
+each image as the `BUILD_ENVIRONMENT` environment variable.
+
+See `build.sh` for valid build environments (it's the giant switch).
+
+Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definitions.py`
+
+## Contents
+
+* `build.sh` -- dispatch script to launch all builds
+* `common` -- scripts used to execute individual Docker build stages
+* `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
+
+## Usage
+
+```bash
+# Build a specific image
+./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
+
+# Set flags (see build.sh) and build image
+sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
+```
--- a/.ci/docker/android/AndroidManifest.xml
+++ b/.ci/docker/android/AndroidManifest.xml
@ -0,0 +1 @@
+<manifest package="org.pytorch.deps" />
--- a/.ci/docker/android/build.gradle
+++ b/.ci/docker/android/build.gradle
@ -0,0 +1,66 @@
+buildscript {
+    ext {
+        minSdkVersion = 21
+        targetSdkVersion = 28
+        compileSdkVersion = 28
+        buildToolsVersion = '28.0.3'
+
+        coreVersion = "1.2.0"
+        extJUnitVersion = "1.1.1"
+        runnerVersion = "1.2.0"
+        rulesVersion = "1.2.0"
+        junitVersion = "4.12"
+    }
+
+    repositories {
+        google()
+        mavenLocal()
+        mavenCentral()
+        jcenter()
+    }
+
+    dependencies {
+        classpath 'com.android.tools.build:gradle:4.1.2'
+        classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2'
+    }
+}
+
+repositories {
+    google()
+    jcenter()
+}
+
+apply plugin: 'com.android.library'
+
+android {
+    compileSdkVersion rootProject.compileSdkVersion
+    buildToolsVersion rootProject.buildToolsVersion
+
+    defaultConfig {
+        minSdkVersion minSdkVersion
+        targetSdkVersion targetSdkVersion
+    }
+
+    sourceSets {
+        main {
+            manifest.srcFile 'AndroidManifest.xml'
+        }
+    }
+}
+
+dependencies {
+    implementation 'com.android.support:appcompat-v7:28.0.0'
+    implementation 'androidx.appcompat:appcompat:1.0.0'
+    implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
+    implementation 'com.google.code.findbugs:jsr305:3.0.1'
+    implementation 'com.facebook.soloader:nativeloader:0.10.4'
+
+    implementation 'junit:junit:' + rootProject.junitVersion
+    implementation 'androidx.test:core:' + rootProject.coreVersion
+
+    implementation 'junit:junit:' + rootProject.junitVersion
+    implementation 'androidx.test:core:' + rootProject.coreVersion
+    implementation 'androidx.test.ext:junit:' + rootProject.extJUnitVersion
+    implementation 'androidx.test:rules:' + rootProject.rulesVersion
+    implementation 'androidx.test:runner:' + rootProject.runnerVersion
+}
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -0,0 +1,392 @@
+#!/bin/bash
+
+set -ex
+
+image="$1"
+shift
+
+if [ -z "${image}" ]; then
+  echo "Usage: $0 IMAGE"
+  exit 1
+fi
+
+function extract_version_from_image_name() {
+  eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
+  if [ "x${!2}" = x ]; then
+    echo "variable '$2' not correctly parsed from image='$image'"
+    exit 1
+  fi
+}
+
+function extract_all_from_image_name() {
+  # parts $image into array, splitting on '-'
+  keep_IFS="$IFS"
+  IFS="-"
+  declare -a parts=($image)
+  IFS="$keep_IFS"
+  unset keep_IFS
+
+  for part in "${parts[@]}"; do
+    name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
+    vername="${name^^}_VERSION"
+    # "py" is the odd one out, needs this special case
+    if [ "x${name}" = xpy ]; then
+      vername=ANACONDA_PYTHON_VERSION
+    fi
+    # skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
+    if [ -n "${name}" ]; then
+      extract_version_from_image_name "${name}" "${vername}"
+    fi
+  done
+}
+
+# Use the same pre-built XLA test image from PyTorch/XLA
+if [[ "$image" == *xla* ]]; then
+  echo "Using pre-built XLA test image..."
+  exit 0
+fi
+
+if [[ "$image" == *-bionic* ]]; then
+  UBUNTU_VERSION=18.04
+elif [[ "$image" == *-focal* ]]; then
+  UBUNTU_VERSION=20.04
+elif [[ "$image" == *-jammy* ]]; then
+  UBUNTU_VERSION=22.04
+elif [[ "$image" == *ubuntu* ]]; then
+  extract_version_from_image_name ubuntu UBUNTU_VERSION
+elif [[ "$image" == *centos* ]]; then
+  extract_version_from_image_name centos CENTOS_VERSION
+fi
+
+if [ -n "${UBUNTU_VERSION}" ]; then
+  OS="ubuntu"
+elif [ -n "${CENTOS_VERSION}" ]; then
+  OS="centos"
+else
+  echo "Unable to derive operating system base..."
+  exit 1
+fi
+
+DOCKERFILE="${OS}/Dockerfile"
+# When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
+if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
+  DOCKERFILE="${OS}-cuda/Dockerfile"
+elif [[ "$image" == *rocm* ]]; then
+  DOCKERFILE="${OS}-rocm/Dockerfile"
+elif [[ "$image" == *linter* ]]; then
+  # Use a separate Dockerfile for linter to keep a small image size
+  DOCKERFILE="linter/Dockerfile"
+fi
+
+# CMake 3.18 is needed to support CUDA17 language variant
+CMAKE_VERSION=3.18.5
+
+_UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
+_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
+
+# It's annoying to rename jobs every time you want to rewrite a
+# configuration, so we hardcode everything here rather than do it
+# from scratch
+case "$image" in
+  pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7)
+    CUDA_VERSION=11.6.2
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.10
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
+    CUDA_VERSION=11.7.0
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.10
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7)
+    CUDA_VERSION=11.8.0
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.10
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-py3-clang7-asan)
+    ANACONDA_PYTHON_VERSION=3.9
+    CLANG_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-py3-clang10-onnx)
+    ANACONDA_PYTHON_VERSION=3.8
+    CLANG_VERSION=10
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-py3-clang7-android-ndk-r19c)
+    ANACONDA_PYTHON_VERSION=3.7
+    CLANG_VERSION=7
+    LLVMDEV=yes
+    PROTOBUF=yes
+    ANDROID=yes
+    ANDROID_NDK_VERSION=r19c
+    GRADLE_VERSION=6.8.3
+    NINJA_VERSION=1.9.0
+    ;;
+  pytorch-linux-bionic-py3.8-clang9)
+    ANACONDA_PYTHON_VERSION=3.8
+    CLANG_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    VULKAN_SDK_VERSION=1.2.162.1
+    SWIFTSHADER=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-bionic-py3.11-clang9)
+    ANACONDA_PYTHON_VERSION=3.11
+    CLANG_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    VULKAN_SDK_VERSION=1.2.162.1
+    SWIFTSHADER=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-bionic-py3.8-gcc9)
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-rocm-n-1-py3)
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=5.3
+    NINJA_VERSION=1.9.0
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-rocm-n-py3)
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=5.4.2
+    NINJA_VERSION=1.9.0
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-focal-py3.8-gcc7)
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    CONDA_CMAKE=yes
+    ;;
+  pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
+    ANACONDA_PYTHON_VERSION=3.8
+    CUDA_VERSION=11.6
+    CUDNN_VERSION=8
+    CLANG_VERSION=12
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-jammy-cuda11.7-cudnn8-py3.8-clang12)
+    ANACONDA_PYTHON_VERSION=3.8
+    CUDA_VERSION=11.7
+    CUDNN_VERSION=8
+    CLANG_VERSION=12
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
+    ANACONDA_PYTHON_VERSION=3.8
+    CUDA_VERSION=11.8
+    CUDNN_VERSION=8
+    CLANG_VERSION=12
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-focal-linter)
+    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
+    # We will need to update mypy version eventually, but that's for another day. The task
+    # would be to upgrade mypy to 1.0.0 with Python 3.11
+    ANACONDA_PYTHON_VERSION=3.9
+    CONDA_CMAKE=yes
+    ;;
+  *)
+    # Catch-all for builds that are not hardcoded.
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    echo "image '$image' did not match an existing build configuration"
+    if [[ "$image" == *py* ]]; then
+      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
+    fi
+    if [[ "$image" == *cuda* ]]; then
+      extract_version_from_image_name cuda CUDA_VERSION
+      extract_version_from_image_name cudnn CUDNN_VERSION
+    fi
+    if [[ "$image" == *rocm* ]]; then
+      extract_version_from_image_name rocm ROCM_VERSION
+      NINJA_VERSION=1.9.0
+    fi
+    if [[ "$image" == *centos7* ]]; then
+      NINJA_VERSION=1.10.2
+    fi
+    if [[ "$image" == *gcc* ]]; then
+      extract_version_from_image_name gcc GCC_VERSION
+    fi
+    if [[ "$image" == *clang* ]]; then
+      extract_version_from_image_name clang CLANG_VERSION
+    fi
+    if [[ "$image" == *devtoolset* ]]; then
+      extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
+    fi
+    if [[ "$image" == *glibc* ]]; then
+      extract_version_from_image_name glibc GLIBC_VERSION
+    fi
+    if [[ "$image" == *cmake* ]]; then
+      extract_version_from_image_name cmake CMAKE_VERSION
+    fi
+  ;;
+esac
+
+tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
+
+#when using cudnn version 8 install it separately from cuda
+if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then
+  IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
+  if [[ ${CUDNN_VERSION} == 8 ]]; then
+    IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
+  fi
+fi
+
+# Build image
+# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
+# it's no longer needed.
+docker build \
+       --no-cache \
+       --progress=plain \
+       --build-arg "BUILD_ENVIRONMENT=${image}" \
+       --build-arg "PROTOBUF=${PROTOBUF:-}" \
+       --build-arg "THRIFT=${THRIFT:-}" \
+       --build-arg "LLVMDEV=${LLVMDEV:-}" \
+       --build-arg "DB=${DB:-}" \
+       --build-arg "VISION=${VISION:-}" \
+       --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
+       --build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
+       --build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
+       --build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
+       --build-arg "CLANG_VERSION=${CLANG_VERSION}" \
+       --build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
+       --build-arg "GCC_VERSION=${GCC_VERSION}" \
+       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
+       --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
+       --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
+       --build-arg "ANDROID=${ANDROID}" \
+       --build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
+       --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
+       --build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
+       --build-arg "SWIFTSHADER=${SWIFTSHADER}" \
+       --build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
+       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
+       --build-arg "KATEX=${KATEX:-}" \
+       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
+       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906}" \
+       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
+       --build-arg "UCX_COMMIT=${UCX_COMMIT}" \
+       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
+       --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
+       -f $(dirname ${DOCKERFILE})/Dockerfile \
+       -t "$tmp_tag" \
+       "$@" \
+       .
+
+# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
+# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
+# find the correct image. As a result, here we have to replace the
+#   "$UBUNTU_VERSION" == "18.04-rc"
+# with
+#   "$UBUNTU_VERSION" == "18.04"
+UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
+
+function drun() {
+  docker run --rm "$tmp_tag" $*
+}
+
+if [[ "$OS" == "ubuntu" ]]; then
+
+  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
+    echo "OS=ubuntu, but:"
+    drun lsb_release -a
+    exit 1
+  fi
+  if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
+    echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
+    drun lsb_release -a
+    exit 1
+  fi
+fi
+
+if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
+  if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
+    echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
+    drun python --version
+    exit 1
+  fi
+fi
+
+if [ -n "$GCC_VERSION" ]; then
+  if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
+    echo "GCC_VERSION=$GCC_VERSION, but:"
+    drun gcc --version
+    exit 1
+  fi
+fi
+
+if [ -n "$CLANG_VERSION" ]; then
+  if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
+    echo "CLANG_VERSION=$CLANG_VERSION, but:"
+    drun clang --version
+    exit 1
+  fi
+fi
+
+if [ -n "$KATEX" ]; then
+  if !(drun katex --version); then
+    echo "KATEX=$KATEX, but:"
+    drun katex --version
+    exit 1
+  fi
+fi
--- a/.ci/docker/build_docker.sh
+++ b/.ci/docker/build_docker.sh
@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -ex
+
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*)
+}
+
+# If UPSTREAM_BUILD_ID is set (see trigger job), then we can
+# use it to tag this build with the same ID used to tag all other
+# base image builds. Also, we can try and pull the previous
+# image first, to avoid rebuilding layers that haven't changed.
+
+#until we find a way to reliably reuse previous build, this last_tag is not in use
+# last_tag="$(( CIRCLE_BUILD_NUM - 1 ))"
+tag="${DOCKER_TAG}"
+
+
+registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
+image="${registry}/pytorch/${IMAGE_NAME}"
+
+login() {
+  aws ecr get-authorization-token --region us-east-1 --output text --query 'authorizationData[].authorizationToken' |
+    base64 -d |
+    cut -d: -f2 |
+    docker login -u AWS --password-stdin "$1"
+}
+
+
+# Only run these steps if not on github actions
+if [[ -z "${GITHUB_ACTIONS}" ]]; then
+  # Retry on timeouts (can happen on job stampede).
+  retry login "${registry}"
+  # Logout on exit
+  trap "docker logout ${registry}" EXIT
+fi
+
+# Try to pull the previous image (perhaps we can reuse some layers)
+# if [ -n "${last_tag}" ]; then
+#   docker pull "${image}:${last_tag}" || true
+# fi
+
+# Build new image
+./build.sh ${IMAGE_NAME} -t "${image}:${tag}"
+
+# Only push if `DOCKER_SKIP_PUSH` = false
+if [ "${DOCKER_SKIP_PUSH:-true}" = "false" ]; then
+  # Only push if docker image doesn't exist already.
+  # ECR image tags are immutable so this will avoid pushing if only just testing if the docker jobs work
+  # NOTE: The only workflow that should push these images should be the docker-builds.yml workflow
+  if ! docker manifest inspect "${image}:${tag}" >/dev/null 2>/dev/null; then
+    docker push "${image}:${tag}"
+  fi
+fi
+
+if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
+  trap "rm -rf ${IMAGE_NAME}:${tag}.tar" EXIT
+  docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
+  aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
+fi
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -0,0 +1,111 @@
+ARG CENTOS_VERSION
+
+FROM centos:${CENTOS_VERSION}
+
+ARG CENTOS_VERSION
+
+# Set AMD gpu targets to build for
+ARG PYTORCH_ROCM_ARCH
+ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
+
+# Install required packages to build Caffe2
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Update CentOS git version
+RUN yum -y remove git
+RUN yum -y remove git-*
+RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm || \
+    (yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
+    sed -i "s/packages.endpoint/packages.endpointdev/" /etc/yum.repos.d/endpoint.repo)
+RUN yum install -y git
+
+# Install devtoolset
+ARG DEVTOOLSET_VERSION
+COPY ./common/install_devtoolset.sh install_devtoolset.sh
+RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
+ENV BASH_ENV "/etc/profile"
+
+# (optional) Install non-default glibc version
+ARG GLIBC_VERSION
+COPY ./common/install_glibc.sh install_glibc.sh
+RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
+RUN rm install_glibc.sh
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ARG CONDA_CMAKE
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+COPY requirements-ci.txt /opt/conda/requirements-ci.txt
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
+
+# (optional) Install protobuf for ONNX
+ARG PROTOBUF
+COPY ./common/install_protobuf.sh install_protobuf.sh
+RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
+RUN rm install_protobuf.sh
+ENV INSTALLED_PROTOBUF ${PROTOBUF}
+
+# (optional) Install database packages like LMDB and LevelDB
+ARG DB
+COPY ./common/install_db.sh install_db.sh
+RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
+RUN rm install_db.sh
+ENV INSTALLED_DB ${DB}
+
+# (optional) Install vision packages like OpenCV and ffmpeg
+ARG VISION
+COPY ./common/install_vision.sh install_vision.sh
+RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
+RUN rm install_vision.sh
+ENV INSTALLED_VISION ${VISION}
+
+# Install rocm
+ARG ROCM_VERSION
+COPY ./common/install_rocm.sh install_rocm.sh
+RUN bash ./install_rocm.sh
+RUN rm install_rocm.sh
+COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
+RUN bash ./install_rocm_magma.sh
+RUN rm install_rocm_magma.sh
+ENV PATH /opt/rocm/bin:$PATH
+ENV PATH /opt/rocm/hcc/bin:$PATH
+ENV PATH /opt/rocm/hip/bin:$PATH
+ENV PATH /opt/rocm/opencl/bin:$PATH
+ENV PATH /opt/rocm/llvm/bin:$PATH
+ENV MAGMA_HOME /opt/rocm/magma
+ENV LANG en_US.utf8
+ENV LC_ALL en_US.utf8
+
+# (optional) Install non-default CMake version
+ARG CMAKE_VERSION
+COPY ./common/install_cmake.sh install_cmake.sh
+RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
+RUN rm install_cmake.sh
+
+# (optional) Install non-default Ninja version
+ARG NINJA_VERSION
+COPY ./common/install_ninja.sh install_ninja.sh
+RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
+RUN rm install_ninja.sh
+
+# Install ccache/sccache (do this last, so we get priority in PATH)
+COPY ./common/install_cache.sh install_cache.sh
+ENV PATH /opt/cache/bin:$PATH
+RUN bash ./install_cache.sh && rm install_cache.sh
+
+# Include BUILD_ENVIRONMENT environment variable in image
+ARG BUILD_ENVIRONMENT
+ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/docker/common/common_utils.sh
+++ b/.ci/docker/common/common_utils.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Work around bug where devtoolset replaces sudo and breaks it.
+if [ -n "$DEVTOOLSET_VERSION" ]; then
+  export SUDO=/bin/sudo
+else
+  export SUDO=sudo
+fi
+
+as_jenkins() {
+  # NB: unsetting the environment variables works around a conda bug
+  # https://github.com/conda/conda/issues/6576
+  # NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
+  # NB: This must be run from a directory that jenkins has access to,
+  # works around https://github.com/conda/conda-package-handling/pull/34
+  $SUDO -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
+}
+
+conda_install() {
+  # Ensure that the install command don't upgrade/downgrade Python
+  # This should be called as
+  #   conda_install pkg1 pkg2 ... [-c channel]
+  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
+}
+
+conda_run() {
+  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
+}
+
+pip_install() {
+  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
+}
--- a/.ci/docker/common/install_android.sh
+++ b/.ci/docker/common/install_android.sh
@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "${ANDROID_NDK}" ]
+
+_https_amazon_aws=https://ossci-android.s3.amazonaws.com
+
+apt-get update
+apt-get install -y --no-install-recommends autotools-dev autoconf unzip
+apt-get autoclean && apt-get clean
+rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+pushd /tmp
+curl -Os --retry 3 $_https_amazon_aws/android-ndk-${ANDROID_NDK}-linux-x86_64.zip
+popd
+_ndk_dir=/opt/ndk
+mkdir -p "$_ndk_dir"
+unzip -qo /tmp/android*.zip -d "$_ndk_dir"
+_versioned_dir=$(find "$_ndk_dir/" -mindepth 1 -maxdepth 1 -type d)
+mv "$_versioned_dir"/* "$_ndk_dir"/
+rmdir "$_versioned_dir"
+rm -rf /tmp/*
+
+# Install OpenJDK
+# https://hub.docker.com/r/picoded/ubuntu-openjdk-8-jdk/dockerfile/
+
+sudo apt-get update && \
+    apt-get install -y openjdk-8-jdk && \
+    apt-get install -y ant && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm -rf /var/cache/oracle-jdk8-installer;
+
+# Fix certificate issues, found as of
+# https://bugs.launchpad.net/ubuntu/+source/ca-certificates-java/+bug/983302
+
+sudo apt-get update && \
+    apt-get install -y ca-certificates-java && \
+    apt-get clean && \
+    update-ca-certificates -f && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm -rf /var/cache/oracle-jdk8-installer;
+
+export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
+
+# Installing android sdk
+# https://github.com/circleci/circleci-images/blob/staging/android/Dockerfile.m4
+
+_tmp_sdk_zip=/tmp/android-sdk-linux.zip
+_android_home=/opt/android/sdk
+
+rm -rf $_android_home
+sudo mkdir -p $_android_home
+curl --silent --show-error --location --fail --retry 3 --output /tmp/android-sdk-linux.zip $_https_amazon_aws/android-sdk-linux-tools3859397-build-tools2803-2902-platforms28-29.zip
+sudo unzip -q $_tmp_sdk_zip -d $_android_home
+rm $_tmp_sdk_zip
+
+sudo chmod -R 777 $_android_home
+
+export ANDROID_HOME=$_android_home
+export ADB_INSTALL_TIMEOUT=120
+
+export PATH="${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:${PATH}"
+echo "PATH:${PATH}"
+
+# Installing Gradle
+echo "GRADLE_VERSION:${GRADLE_VERSION}"
+_gradle_home=/opt/gradle
+sudo rm -rf $gradle_home
+sudo mkdir -p $_gradle_home
+
+curl --silent --output /tmp/gradle.zip --retry 3 $_https_amazon_aws/gradle-${GRADLE_VERSION}-bin.zip
+
+sudo unzip -q /tmp/gradle.zip -d $_gradle_home
+rm /tmp/gradle.zip
+
+sudo chmod -R 777 $_gradle_home
+
+export GRADLE_HOME=$_gradle_home/gradle-$GRADLE_VERSION
+alias gradle="${GRADLE_HOME}/bin/gradle"
+
+export PATH="${GRADLE_HOME}/bin/:${PATH}"
+echo "PATH:${PATH}"
+
+gradle --version
+
+mkdir /var/lib/jenkins/gradledeps
+cp build.gradle /var/lib/jenkins/gradledeps
+cp AndroidManifest.xml /var/lib/jenkins/gradledeps
+
+pushd /var/lib/jenkins
+
+export GRADLE_LOCAL_PROPERTIES=gradledeps/local.properties
+rm -f $GRADLE_LOCAL_PROPERTIES
+echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES
+echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES
+
+chown -R jenkins /var/lib/jenkins/gradledeps
+chgrp -R jenkins /var/lib/jenkins/gradledeps
+
+sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble
+
+chown -R jenkins /var/lib/jenkins/.gradle
+chgrp -R jenkins /var/lib/jenkins/.gradle
+
+popd
+
+rm -rf /var/lib/jenkins/.gradle/daemon
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -0,0 +1,169 @@
+#!/bin/bash
+
+set -ex
+
+install_ubuntu() {
+  # NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
+  # for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
+  # find the correct image. As a result, here we have to check for
+  #   "$UBUNTU_VERSION" == "18.04"*
+  # instead of
+  #   "$UBUNTU_VERSION" == "18.04"
+  if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
+    cmake3="cmake=3.10*"
+    maybe_libiomp_dev="libiomp-dev"
+  elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
+    cmake3="cmake=3.16*"
+    maybe_libiomp_dev=""
+  elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
+    cmake3="cmake=3.22*"
+    maybe_libiomp_dev=""
+  else
+    cmake3="cmake=3.5*"
+    maybe_libiomp_dev="libiomp-dev"
+  fi
+
+  if [[ "$CLANG_VERSION" == 12 ]]; then
+    maybe_libomp_dev="libomp-12-dev"
+  elif [[ "$CLANG_VERSION" == 10 ]]; then
+    maybe_libomp_dev="libomp-10-dev"
+  else
+    maybe_libomp_dev=""
+  fi
+
+  # TODO: Remove this once nvidia package repos are back online
+  # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
+  # shellcheck disable=SC2046
+  sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
+
+  # Install common dependencies
+  apt-get update
+  # TODO: Some of these may not be necessary
+  ccache_deps="asciidoc docbook-xml docbook-xsl xsltproc"
+  deploy_deps="libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev"
+  numpy_deps="gfortran"
+  apt-get install -y --no-install-recommends \
+    $ccache_deps \
+    $numpy_deps \
+    ${deploy_deps} \
+    ${cmake3} \
+    apt-transport-https \
+    autoconf \
+    automake \
+    build-essential \
+    ca-certificates \
+    curl \
+    git \
+    libatlas-base-dev \
+    libc6-dbg \
+    ${maybe_libiomp_dev} \
+    libyaml-dev \
+    libz-dev \
+    libjpeg-dev \
+    libasound2-dev \
+    libsndfile-dev \
+    ${maybe_libomp_dev} \
+    software-properties-common \
+    wget \
+    sudo \
+    vim \
+    jq \
+    libtool \
+    vim \
+    unzip \
+    gdb
+
+  # Should resolve issues related to various apt package repository cert issues
+  # see: https://github.com/pytorch/pytorch/issues/65931
+  apt-get install -y libgnutls30
+
+  # cuda-toolkit does not work with gcc-11.2.0 which is default in Ubunutu 22.04
+  # see: https://github.com/NVlabs/instant-ngp/issues/119
+  if [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
+    apt-get install -y g++-10
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30
+    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-10 30
+
+    # https://www.spinics.net/lists/libreoffice/msg07549.html
+    sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/11
+    wget https://github.com/gcc-mirror/gcc/commit/2b2d97fc545635a0f6aa9c9ee3b017394bc494bf.patch -O noexecpt.patch
+    sudo patch  /usr/include/c++/10/bits/range_access.h noexecpt.patch
+  fi
+
+  # Cleanup package manager
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+}
+
+install_centos() {
+  # Need EPEL for many packages we depend on.
+  # See http://fedoraproject.org/wiki/EPEL
+  yum --enablerepo=extras install -y epel-release
+
+  ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
+  numpy_deps="gcc-gfortran"
+  # Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
+  # for Caffe2. That said, we still install them to make sure the build
+  # system opts to build/use protoc and libprotobuf from third-party.
+  yum install -y \
+    $ccache_deps \
+    $numpy_deps \
+    autoconf \
+    automake \
+    bzip2 \
+    cmake \
+    cmake3 \
+    curl \
+    gcc \
+    gcc-c++ \
+    gflags-devel \
+    git \
+    glibc-devel \
+    glibc-headers \
+    glog-devel \
+    hiredis-devel \
+    libstdc++-devel \
+    libsndfile-devel \
+    make \
+    opencv-devel \
+    sudo \
+    wget \
+    vim \
+    unzip \
+    gdb
+
+  # Cleanup
+  yum clean all
+  rm -rf /var/cache/yum
+  rm -rf /var/lib/yum/yumdb
+  rm -rf /var/lib/yum/history
+}
+
+# Install base packages depending on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    install_ubuntu
+    ;;
+  centos)
+    install_centos
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
+
+# Install Valgrind separately since the apt-get version is too old.
+mkdir valgrind_build && cd valgrind_build
+VALGRIND_VERSION=3.20.0
+wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
+tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
+cd valgrind-${VALGRIND_VERSION}
+./configure --prefix=/usr/local
+make -j6
+sudo make install
+cd ../../
+rm -rf valgrind_build
+alias valgrind="/usr/local/bin/valgrind"
--- a/.ci/docker/common/install_cache.sh
+++ b/.ci/docker/common/install_cache.sh
@ -0,0 +1,121 @@
+#!/bin/bash
+
+set -ex
+
+install_ubuntu() {
+  echo "Preparing to build sccache from source"
+  apt-get update
+  # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
+  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
+  apt-get install -y cargo
+  echo "Checking out sccache repo"
+  git clone https://github.com/pytorch/sccache
+  cd sccache
+  echo "Building sccache"
+  cargo build --release
+  cp target/release/sccache /opt/cache/bin
+  echo "Cleaning up"
+  cd ..
+  rm -rf sccache
+  apt-get remove -y cargo rustc
+  apt-get autoclean && apt-get clean
+}
+
+install_binary() {
+  echo "Downloading sccache binary from S3 repo"
+  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
+}
+
+mkdir -p /opt/cache/bin
+mkdir -p /opt/cache/lib
+sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
+export PATH="/opt/cache/bin:$PATH"
+
+# Setup compiler cache
+if [ -n "$ROCM_VERSION" ]; then
+  curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
+else
+  ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+  case "$ID" in
+    ubuntu)
+      install_ubuntu
+      ;;
+    *)
+      install_binary
+      ;;
+  esac
+fi
+chmod a+x /opt/cache/bin/sccache
+
+function write_sccache_stub() {
+  # Unset LD_PRELOAD for ps because of asan + ps issues
+  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
+  printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n  exec sccache $(which $1) \"\$@\"\nelse\n  exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
+  chmod a+x "/opt/cache/bin/$1"
+}
+
+write_sccache_stub cc
+write_sccache_stub c++
+write_sccache_stub gcc
+write_sccache_stub g++
+
+# NOTE: See specific ROCM_VERSION case below.
+if [ "x$ROCM_VERSION" = x ]; then
+  write_sccache_stub clang
+  write_sccache_stub clang++
+fi
+
+if [ -n "$CUDA_VERSION" ]; then
+  # TODO: This is a workaround for the fact that PyTorch's FindCUDA
+  # implementation cannot find nvcc if it is setup this way, because it
+  # appears to search for the nvcc in PATH, and use its path to infer
+  # where CUDA is installed.  Instead, we install an nvcc symlink outside
+  # of the PATH, and set CUDA_NVCC_EXECUTABLE so that we make use of it.
+
+  write_sccache_stub nvcc
+  mv /opt/cache/bin/nvcc /opt/cache/lib/
+fi
+
+if [ -n "$ROCM_VERSION" ]; then
+  # ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
+  # hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
+  # causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
+  # directly under /opt/rocm while also preserving the original compiler names.
+  # Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
+  # Final link in symlink chain must point back to original directory.
+
+  # Original compiler is moved one directory deeper. Wrapper replaces it.
+  function write_sccache_stub_rocm() {
+    OLDCOMP=$1
+    COMPNAME=$(basename $OLDCOMP)
+    TOPDIR=$(dirname $OLDCOMP)
+    WRAPPED="$TOPDIR/original/$COMPNAME"
+    mv "$OLDCOMP" "$WRAPPED"
+    printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP"
+    chmod a+x "$OLDCOMP"
+  }
+
+  if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
+    # ROCm 3.3 or earlier.
+    mkdir /opt/rocm/hcc/bin/original
+    write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
+    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
+    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
+    # Fix last link in symlink chain, clang points to versioned clang in prior dir
+    pushd /opt/rocm/hcc/bin/original
+    ln -s ../$(readlink clang)
+    popd
+  elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
+    # ROCm 3.5 and beyond.
+    mkdir /opt/rocm/llvm/bin/original
+    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
+    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
+    # Fix last link in symlink chain, clang points to versioned clang in prior dir
+    pushd /opt/rocm/llvm/bin/original
+    ln -s ../$(readlink clang)
+    popd
+  else
+    echo "Cannot find ROCm compiler."
+    exit 1
+  fi
+fi
--- a/.ci/docker/common/install_clang.sh
+++ b/.ci/docker/common/install_clang.sh
@ -0,0 +1,47 @@
+#!/bin/bash
+
+set -ex
+
+if [ -n "$CLANG_VERSION" ]; then
+
+  if [[ $CLANG_VERSION == 7 && $UBUNTU_VERSION == 16.04 ]]; then
+    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+    sudo apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main"
+  elif [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
+    sudo apt-get update
+    # gpg-agent is not available by default on 18.04
+    sudo apt-get install  -y --no-install-recommends gpg-agent
+    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  -
+    apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
+  elif [[ $UBUNTU_VERSION == 22.04 ]]; then
+    # work around ubuntu apt-get conflicts
+    sudo apt-get -y -f install
+  fi
+
+  sudo apt-get update
+  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
+  apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
+
+  # Install dev version of LLVM.
+  if [ -n "$LLVMDEV" ]; then
+    sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
+  fi
+
+  # Use update-alternatives to make this version the default
+  # TODO: Decide if overriding gcc as well is a good idea
+  # update-alternatives --install /usr/bin/gcc gcc /usr/bin/clang-"$CLANG_VERSION" 50
+  # update-alternatives --install /usr/bin/g++ g++ /usr/bin/clang++-"$CLANG_VERSION" 50
+  update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
+  update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
+
+  # clang's packaging is a little messed up (the runtime libs aren't
+  # added into the linker path), so give it a little help
+  clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
+  echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
+  ldconfig
+
+  # Cleanup package manager
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+fi
--- a/.ci/docker/common/install_cmake.sh
+++ b/.ci/docker/common/install_cmake.sh
@ -0,0 +1,31 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "$CMAKE_VERSION" ]
+
+# Remove system cmake install so it won't get used instead
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    apt-get remove cmake -y
+    ;;
+  centos)
+    yum remove cmake -y
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
+
+# Turn 3.6.3 into v3.6
+path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
+file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"
+
+# Download and install specific CMake version in /usr/local
+pushd /tmp
+curl -Os --retry 3 "https://cmake.org/files/${path}/${file}"
+tar -C /usr/local --strip-components 1 --no-same-owner -zxf cmake-*.tar.gz
+rm -f cmake-*.tar.gz
+popd
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -0,0 +1,98 @@
+#!/bin/bash
+
+set -ex
+
+# Optionally install conda
+if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
+  BASE_URL="https://repo.anaconda.com/miniconda"
+
+  MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
+
+  case "$MAJOR_PYTHON_VERSION" in
+    2)
+      CONDA_FILE="Miniconda2-latest-Linux-x86_64.sh"
+    ;;
+    3)
+      CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
+    ;;
+    *)
+      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
+      exit 1
+      ;;
+  esac
+
+  mkdir -p /opt/conda
+  chown jenkins:jenkins /opt/conda
+
+  source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+
+  pushd /tmp
+  wget -q "${BASE_URL}/${CONDA_FILE}"
+  # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
+  as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
+  popd
+
+  # NB: Don't do this, rely on the rpath to get it right
+  #echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
+  #ldconfig
+  sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
+  export PATH="/opt/conda/bin:$PATH"
+
+  # Ensure we run conda in a directory that jenkins has write access to
+  pushd /opt/conda
+
+  # Prevent conda from updating to 4.14.0, which causes docker build failures
+  # See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
+  # Uncomment the below when resolved to track the latest conda update
+  # as_jenkins conda update -y -n base conda
+
+  # Install correct Python version
+  as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION"
+
+  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
+  CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
+  if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
+    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
+    # TODO: Stop using `-c malfet`
+    conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0 -c malfet
+  elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
+    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
+    conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
+  elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
+    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
+    conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
+  elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
+    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
+    conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
+  else
+    # Install `typing-extensions` for 3.7
+    conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} typing-extensions
+  fi
+
+  # Use conda cmake in some cases. Conda cmake will be newer than our supported
+  # min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
+  # following builds that we know should use conda. Specifically, Ubuntu bionic
+  # and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
+  if [ -n "${CONDA_CMAKE}" ]; then
+    conda_install cmake
+  fi
+
+  # Magma package names are concatenation of CUDA major and minor ignoring revision
+  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
+  if [ -n "$CUDA_VERSION" ]; then
+    conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
+  fi
+
+  # Install some other packages, including those needed for Python test reporting
+  pip_install -r /opt/conda/requirements-ci.txt
+
+  # Update scikit-learn to a python-3.8 compatible version
+  if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
+    pip_install -U scikit-learn
+  else
+    # Pinned scikit-learn due to https://github.com/scikit-learn/scikit-learn/issues/14485 (affects gcc 5.5 only)
+    pip_install scikit-learn==0.20.3
+  fi
+
+  popd
+fi
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+
+if [[ ${CUDNN_VERSION} == 8 ]]; then
+    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+    mkdir tmp_cudnn && cd tmp_cudnn
+    CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
+    if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
+        curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
+    elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
+        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
+    else
+        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
+    fi
+
+    tar xf ${CUDNN_NAME}.tar.xz
+    cp -a ${CUDNN_NAME}/include/* /usr/include/
+    cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
+    cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
+
+    cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
+    cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
+    cd ..
+    rm -rf tmp_cudnn
+    ldconfig
+fi
--- a/.ci/docker/common/install_db.sh
+++ b/.ci/docker/common/install_db.sh
@ -0,0 +1,49 @@
+#!/bin/bash
+
+set -ex
+
+install_ubuntu() {
+  apt-get update
+  apt-get install -y --no-install-recommends \
+          libhiredis-dev \
+          libleveldb-dev \
+          liblmdb-dev \
+          libsnappy-dev
+
+  # Cleanup
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+}
+
+install_centos() {
+  # Need EPEL for many packages we depend on.
+  # See http://fedoraproject.org/wiki/EPEL
+  yum --enablerepo=extras install -y epel-release
+
+  yum install -y \
+      hiredis-devel \
+      leveldb-devel \
+      lmdb-devel \
+      snappy-devel
+
+  # Cleanup
+  yum clean all
+  rm -rf /var/cache/yum
+  rm -rf /var/lib/yum/yumdb
+  rm -rf /var/lib/yum/history
+}
+
+# Install base packages depending on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    install_ubuntu
+    ;;
+  centos)
+    install_centos
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
--- a/.ci/docker/common/install_devtoolset.sh
+++ b/.ci/docker/common/install_devtoolset.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "$DEVTOOLSET_VERSION" ]
+
+yum install -y centos-release-scl
+yum install -y devtoolset-$DEVTOOLSET_VERSION
+
+echo "source scl_source enable devtoolset-$DEVTOOLSET_VERSION" > "/etc/profile.d/devtoolset-$DEVTOOLSET_VERSION.sh"
--- a/.ci/docker/common/install_docs_reqs.sh
+++ b/.ci/docker/common/install_docs_reqs.sh
@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -ex
+
+if [ -n "$KATEX" ]; then
+  apt-get update
+  # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
+  apt-get install -y gpg-agent || :
+
+  curl --retry 3 -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -
+  sudo apt-get install -y nodejs
+
+  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
+  echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
+
+  apt-get update
+  apt-get install -y --no-install-recommends yarn
+  yarn global add katex --prefix /usr/local
+
+  sudo apt-get -y install doxygen
+
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+fi
--- a/.ci/docker/common/install_gcc.sh
+++ b/.ci/docker/common/install_gcc.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -ex
+
+if [ -n "$GCC_VERSION" ]; then
+
+  # Need the official toolchain repo to get alternate packages
+  add-apt-repository ppa:ubuntu-toolchain-r/test
+  apt-get update
+  if [[ "$UBUNTU_VERSION" == "16.04" && "${GCC_VERSION:0:1}" == "5" ]]; then
+    apt-get install -y g++-5=5.4.0-6ubuntu1~16.04.12
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50
+    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-5 50
+  else
+    apt-get install -y g++-$GCC_VERSION
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
+    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
+  fi
+
+
+  # Cleanup package manager
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+fi
--- a/.ci/docker/common/install_glibc.sh
+++ b/.ci/docker/common/install_glibc.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "$GLIBC_VERSION" ]
+if [[ -n "$CENTOS_VERSION" ]]; then
+  [ -n "$DEVTOOLSET_VERSION" ]
+fi
+
+yum install -y wget sed
+
+mkdir -p /packages && cd /packages
+wget -q http://ftp.gnu.org/gnu/glibc/glibc-$GLIBC_VERSION.tar.gz
+tar xzf glibc-$GLIBC_VERSION.tar.gz
+if [[ "$GLIBC_VERSION" == "2.26" ]]; then
+  cd glibc-$GLIBC_VERSION
+  sed -i 's/$name ne "nss_test1"/$name ne "nss_test1" \&\& $name ne "nss_test2"/' scripts/test-installation.pl
+  cd ..
+fi
+mkdir -p glibc-$GLIBC_VERSION-build && cd glibc-$GLIBC_VERSION-build
+
+if [[ -n "$CENTOS_VERSION" ]]; then
+  export PATH=/opt/rh/devtoolset-$DEVTOOLSET_VERSION/root/usr/bin:$PATH
+fi
+
+../glibc-$GLIBC_VERSION/configure --prefix=/usr CFLAGS='-Wno-stringop-truncation -Wno-format-overflow -Wno-restrict -Wno-format-truncation -g -O2'
+make -j$(nproc)
+make install
+
+# Cleanup
+rm -rf /packages
+rm -rf /var/cache/yum/*
+rm -rf /var/lib/rpm/__db.*
+yum clean all
--- a/.ci/docker/common/install_jni.sh
+++ b/.ci/docker/common/install_jni.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -ex
+
+mkdir -p /usr/local/include
+cp jni.h /usr/local/include
--- a/.ci/docker/common/install_lcov.sh
+++ b/.ci/docker/common/install_lcov.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -ex
+
+git clone --branch v1.15 https://github.com/linux-test-project/lcov.git
+pushd lcov
+sudo make install   # will be installed in /usr/local/bin/lcov
+popd
--- a/.ci/docker/common/install_linter.sh
+++ b/.ci/docker/common/install_linter.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -ex
+
+source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+
+if [ -n "${UBUNTU_VERSION}" ]; then
+  apt update
+  apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
+fi
+
+# Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
+git clone https://github.com/pytorch/pytorch.git --depth 1
+chown -R jenkins pytorch
+
+pushd pytorch
+# Install all linter dependencies
+pip_install -r requirements.txt
+conda_run lintrunner init
+
+# Cache .lintbin directory as part of the Docker image
+cp -r .lintbin /tmp
+popd
+
+# Node dependencies required by toc linter job
+npm install -g markdown-toc
+
+# Cleaning up
+rm -rf pytorch
--- a/.ci/docker/common/install_ninja.sh
+++ b/.ci/docker/common/install_ninja.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "$NINJA_VERSION" ]
+
+url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
+
+pushd /tmp
+wget --no-verbose --output-document=ninja-linux.zip "$url"
+unzip ninja-linux.zip -d /usr/local/bin
+rm -f ninja-linux.zip
+popd
--- a/.ci/docker/common/install_openmpi.sh
+++ b/.ci/docker/common/install_openmpi.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+sudo apt-get update
+# also install ssh to avoid error of:
+# --------------------------------------------------------------------------
+# The value of the MCA parameter "plm_rsh_agent" was set to a path
+# that could not be found:
+#   plm_rsh_agent: ssh : rsh
+sudo apt-get install -y ssh
+sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
--- a/.ci/docker/common/install_openssl.sh
+++ b/.ci/docker/common/install_openssl.sh
@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -ex
+
+OPENSSL=openssl-1.1.1k
+
+wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
+tar xf "${OPENSSL}.tar.gz"
+cd "${OPENSSL}"
+./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
+# NOTE: openssl install errors out when built with the -j option
+make -j6; make install_sw
+# Link the ssl libraries to the /usr/lib folder.
+sudo ln -s /opt/openssl/lib/lib* /usr/lib
+cd ..
+rm -rf "${OPENSSL}"
--- a/.ci/docker/common/install_protobuf.sh
+++ b/.ci/docker/common/install_protobuf.sh
@ -0,0 +1,56 @@
+#!/bin/bash
+
+set -ex
+
+# This function installs protobuf 3.17
+install_protobuf_317() {
+  pb_dir="/usr/temp_pb_install_dir"
+  mkdir -p $pb_dir
+
+  # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
+  # else it will fail with
+  #   g++: error: ./../lib64/crti.o: No such file or directory
+  ln -s /usr/lib64 "$pb_dir/lib64"
+
+  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
+  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
+  # -j6 to balance memory usage and speed.
+  # naked `-j` seems to use too much memory.
+  pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
+  popd
+  rm -rf $pb_dir
+}
+
+install_ubuntu() {
+  # Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
+  # install cmake3 here and use cmake3.
+  apt-get update
+  if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
+    apt-get install -y --no-install-recommends cmake3
+  fi
+
+  # Cleanup
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+  install_protobuf_317
+}
+
+install_centos() {
+  install_protobuf_317
+}
+
+# Install base packages depending on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    install_ubuntu
+    ;;
+  centos)
+    install_centos
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -0,0 +1,146 @@
+#!/bin/bash
+
+set -ex
+
+ver() {
+    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
+}
+
+# Map ROCm version to AMDGPU version
+declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )
+
+install_ubuntu() {
+    apt-get update
+    if [[ $UBUNTU_VERSION == 18.04 ]]; then
+      # gpg-agent is not available by default on 18.04
+      apt-get install -y --no-install-recommends gpg-agent
+    fi
+    if [[ $UBUNTU_VERSION == 20.04 ]]; then
+      # gpg-agent is not available by default on 20.04
+      apt-get install -y --no-install-recommends gpg-agent
+    fi
+    apt-get install -y kmod
+    apt-get install -y wget
+
+    # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
+    apt-get install -y libc++1
+    apt-get install -y libc++abi1
+
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
+        # Add amdgpu repository
+        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
+        local amdgpu_baseurl
+        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
+        else
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
+        fi
+        echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
+    fi
+
+    ROCM_REPO="ubuntu"
+    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
+        ROCM_REPO="xenial"
+    fi
+
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+        ROCM_REPO="${UBUNTU_VERSION_NAME}"
+    fi
+
+    # Add rocm repository
+    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
+    local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
+    echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
+    apt-get update --allow-insecure-repositories
+
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+                   rocm-dev \
+                   rocm-utils \
+                   rocm-libs \
+                   rccl \
+                   rocprofiler-dev \
+                   roctracer-dev
+
+    # precompiled miopen kernels added in ROCm 3.5; search for all unversioned packages
+    # if search fails it will abort this script; use true to avoid case where search fails
+    MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
+    if [[ "x${MIOPENKERNELS}" = x ]]; then
+      echo "miopenkernels package not available"
+    else
+      DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
+    fi
+
+    # Cleanup
+    apt-get autoclean && apt-get clean
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+}
+
+install_centos() {
+
+  yum update -y
+  yum install -y kmod
+  yum install -y wget
+  yum install -y openblas-devel
+
+  yum install -y epel-release
+  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
+
+  if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
+      # Add amdgpu repository
+      local amdgpu_baseurl
+      if [[ $OS_VERSION == 9 ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
+      else
+        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
+        else
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
+        fi
+      fi
+      echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
+      echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
+      echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
+      echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
+      echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
+      echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
+  fi
+
+  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
+  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
+  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
+  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
+  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
+  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
+  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
+
+  yum update -y
+
+  yum install -y \
+                   rocm-dev \
+                   rocm-utils \
+                   rocm-libs \
+                   rccl \
+                   rocprofiler-dev \
+                   roctracer-dev
+
+  # Cleanup
+  yum clean all
+  rm -rf /var/cache/yum
+  rm -rf /var/lib/yum/yumdb
+  rm -rf /var/lib/yum/history
+}
+
+# Install Python packages depending on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    install_ubuntu
+    ;;
+  centos)
+    install_centos
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
--- a/.ci/docker/common/install_rocm_magma.sh
+++ b/.ci/docker/common/install_rocm_magma.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -ex
+
+# "install" hipMAGMA into /opt/rocm/magma by copying after build
+git clone https://bitbucket.org/icl/magma.git
+pushd magma
+# Fixes memory leaks of magma found while executing linalg UTs
+git checkout 5959b8783e45f1809812ed96ae762f38ee701972
+cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
+echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
+echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
+echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
+export PATH="${PATH}:/opt/rocm/bin"
+if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
+  amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
+else
+  amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
+fi
+for arch in $amdgpu_targets; do
+  echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
+done
+# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
+sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
+make -f make.gen.hipMAGMA -j $(nproc)
+LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
+make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
+popd
+mv magma /opt/rocm
--- a/.ci/docker/common/install_swiftshader.sh
+++ b/.ci/docker/common/install_swiftshader.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "${SWIFTSHADER}" ]
+
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+_https_amazon_aws=https://ossci-android.s3.amazonaws.com
+
+# SwiftShader
+_swiftshader_dir=/var/lib/jenkins/swiftshader
+_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
+mkdir -p $_swiftshader_dir
+_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
+
+curl --silent --show-error --location --fail --retry 3 \
+  --output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
+
+tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
+
+export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"
--- a/.ci/docker/common/install_thrift.sh
+++ b/.ci/docker/common/install_thrift.sh
@ -0,0 +1,14 @@
+apt-get update
+apt-get install -y sudo wget libboost-dev libboost-test-dev libboost-program-options-dev libboost-filesystem-dev libboost-thread-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev
+wget https://www-us.apache.org/dist/thrift/0.12.0/thrift-0.12.0.tar.gz
+tar -xvf thrift-0.12.0.tar.gz
+cd thrift-0.12.0
+for file in ./compiler/cpp/Makefile*; do
+  sed -i 's/\-Werror//' $file
+done
+./bootstrap.sh
+./configure --without-php --without-java --without-python --without-nodejs --without-go --without-ruby
+sudo make
+sudo make install
+cd ..
+rm thrift-0.12.0.tar.gz
--- a/.ci/docker/common/install_ucc.sh
+++ b/.ci/docker/common/install_ucc.sh
@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -ex
+
+if [[ -d "/usr/local/cuda/" ]];  then
+  with_cuda=/usr/local/cuda/
+else
+  with_cuda=no
+fi
+
+function install_ucx() {
+  set -ex
+  git clone --recursive https://github.com/openucx/ucx.git
+  pushd ucx
+  git checkout ${UCX_COMMIT}
+  git submodule update --init --recursive
+
+  ./autogen.sh
+  ./configure --prefix=$UCX_HOME      \
+      --enable-mt                     \
+      --with-cuda=$with_cuda          \
+      --enable-profiling              \
+      --enable-stats
+  time make -j
+  sudo make install
+
+  popd
+  rm -rf ucx
+}
+
+function install_ucc() {
+  set -ex
+  git clone --recursive https://github.com/openucx/ucc.git
+  pushd ucc
+  git checkout ${UCC_COMMIT}
+  git submodule update --init --recursive
+
+  ./autogen.sh
+  ./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda
+  time make -j
+  sudo make install
+
+  popd
+  rm -rf ucc
+}
+
+install_ucx
+install_ucc
--- a/.ci/docker/common/install_user.sh
+++ b/.ci/docker/common/install_user.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -ex
+
+# Mirror jenkins user in container
+# jenkins user as ec2-user should have the same user-id
+echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
+echo "jenkins:x:1000:" >> /etc/group
+# Needed on focal or newer
+echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
+
+# Create $HOME
+mkdir -p /var/lib/jenkins
+chown jenkins:jenkins /var/lib/jenkins
+mkdir -p /var/lib/jenkins/.ccache
+chown jenkins:jenkins /var/lib/jenkins/.ccache
+
+# Allow writing to /usr/local (for make install)
+chown jenkins:jenkins /usr/local
+
+# Allow sudo
+# TODO: Maybe we shouldn't
+echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
+
+# Work around bug where devtoolset replaces sudo and breaks it.
+if [ -n "$DEVTOOLSET_VERSION" ]; then
+  SUDO=/bin/sudo
+else
+  SUDO=sudo
+fi
+
+# Test that sudo works
+$SUDO -u jenkins $SUDO -v
--- a/.ci/docker/common/install_vision.sh
+++ b/.ci/docker/common/install_vision.sh
@ -0,0 +1,45 @@
+#!/bin/bash
+
+set -ex
+
+install_ubuntu() {
+  apt-get update
+  apt-get install -y --no-install-recommends \
+          libopencv-dev \
+          libavcodec-dev
+
+  # Cleanup
+  apt-get autoclean && apt-get clean
+  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+}
+
+install_centos() {
+  # Need EPEL for many packages we depend on.
+  # See http://fedoraproject.org/wiki/EPEL
+  yum --enablerepo=extras install -y epel-release
+
+  yum install -y \
+      opencv-devel \
+      ffmpeg-devel
+
+  # Cleanup
+  yum clean all
+  rm -rf /var/cache/yum
+  rm -rf /var/lib/yum/yumdb
+  rm -rf /var/lib/yum/history
+}
+
+# Install base packages depending on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+  ubuntu)
+    install_ubuntu
+    ;;
+  centos)
+    install_centos
+    ;;
+  *)
+    echo "Unable to determine OS..."
+    exit 1
+    ;;
+esac
--- a/.ci/docker/common/install_vulkan_sdk.sh
+++ b/.ci/docker/common/install_vulkan_sdk.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+
+set -ex
+
+[ -n "${VULKAN_SDK_VERSION}" ]
+
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+_vulkansdk_dir=/var/lib/jenkins/vulkansdk
+_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
+
+curl \
+  --silent \
+  --show-error \
+  --location \
+  --fail \
+  --retry 3 \
+  --output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
+
+mkdir -p "${_vulkansdk_dir}"
+tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
+rm -rf "${_tmp_vulkansdk_targz}"
--- a/.ci/docker/java/jni.h
+++ b/.ci/docker/java/jni.h
--- a/.ci/docker/linter/Dockerfile
+++ b/.ci/docker/linter/Dockerfile
@ -0,0 +1,34 @@
+ARG UBUNTU_VERSION
+
+FROM ubuntu:${UBUNTU_VERSION}
+
+ARG UBUNTU_VERSION
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ARG CONDA_CMAKE
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+COPY requirements-ci.txt /opt/conda/requirements-ci.txt
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
+
+# Note that Docker build forbids copying file outside the build context
+COPY ./common/install_linter.sh install_linter.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_linter.sh
+RUN rm install_linter.sh common_utils.sh
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -0,0 +1,260 @@
+# Python dependencies required for unit tests
+
+#awscli==1.6 #this breaks some platforms
+#Description: AWS command line interface
+#Pinned versions: 1.6
+#test that import:
+
+boto3==1.19.12
+#Description: AWS SDK for python
+#Pinned versions: 1.19.12, 1.16.34
+#test that import:
+
+click
+#Description: Command Line Interface Creation Kit
+#Pinned versions:
+#test that import:
+
+coremltools==5.0b5
+#Description: Apple framework for ML integration
+#Pinned versions: 5.0b5
+#test that import:
+
+#dataclasses #this breaks some platforms
+#Description: Provides decorators for auto adding special methods to user classes
+#Pinned versions:
+#test that import:
+
+expecttest==0.1.3
+#Description: method for writing tests where test framework auto populates
+# the expected output based on previous runs
+#Pinned versions: 0.1.3
+#test that import:
+
+flatbuffers==2.0
+#Description: cross platform serialization library
+#Pinned versions: 2.0
+#test that import:
+
+hypothesis==5.35.1
+# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
+#Description: advanced library for generating parametrized tests
+#Pinned versions: 3.44.6, 4.53.2
+#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
+
+junitparser==2.1.1
+#Description: unitparser handles JUnit/xUnit Result XML files
+#Pinned versions: 2.1.1
+#test that import:
+
+librosa>=0.6.2 ; python_version < "3.11"
+#Description: A python package for music and audio analysis
+#Pinned versions: >=0.6.2
+#test that import: test_spectral_ops.py
+
+#mkl #this breaks linux-bionic-rocm4.5-py3.7
+#Description: Intel oneAPI Math Kernel Library
+#Pinned versions:
+#test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
+#test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
+#test_autograd.py
+
+#mkl-devel
+# see mkl
+
+#mock # breaks ci/circleci: docker-pytorch-linux-xenial-py3-clang5-android-ndk-r19c
+#Description: A testing library that allows you to replace parts of your
+#system under test with mock objects
+#Pinned versions:
+#test that import: test_module_init.py, test_modules.py, test_nn.py,
+#test_testing.py
+
+#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
+#Description: collects runtime types of function arguments and return
+#values, and can automatically generate stub files
+#Pinned versions:
+#test that import:
+
+mypy==0.960
+# Pin MyPy version because new errors are likely to appear with each release
+#Description: linter
+#Pinned versions: 0.960
+#test that import: test_typing.py, test_type_hints.py
+
+networkx==2.6.3
+#Description: creation, manipulation, and study of
+#the structure, dynamics, and functions of complex networks
+#Pinned versions: 2.6.3 (latest version that works with Python 3.7+)
+#test that import: functorch
+
+#ninja
+#Description: build system.  Note that it install from
+#here breaks things so it is commented out
+#Pinned versions: 1.10.0.post1
+#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
+
+numba==0.49.0 ; python_version < "3.9"
+numba==0.54.1 ; python_version == "3.9"
+numba==0.55.2 ; python_version == "3.10"
+#Description: Just-In-Time Compiler for Numerical Functions
+#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
+#test that import: test_numba_integration.py
+#For numba issue see https://github.com/pytorch/pytorch/issues/51511
+
+#numpy
+#Description: Provides N-dimensional arrays and linear algebra
+#Pinned versions: 1.20
+#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
+#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
+#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
+#test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
+#test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
+#test_overrides.py, test_numpy_interop.py, test_numba_integration.py
+#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
+#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
+#test_binary_ufuncs.py
+
+#onnxruntime
+#Description: scoring engine for Open Neural Network Exchange (ONNX) models
+#Pinned versions: 1.9.0
+#test that import:
+
+opt-einsum==3.3
+#Description: Python library to optimize tensor contraction order, used in einsum
+#Pinned versions: 3.3
+#test that import: test_linalg.py
+
+#pillow
+#Description:  Python Imaging Library fork
+#Pinned versions:
+#test that import:
+
+protobuf==3.20.2
+#Description:  Google’s data interchange format
+#Pinned versions: 3.20.1
+#test that import: test_tensorboard.py
+
+psutil
+#Description: information on running processes and system utilization
+#Pinned versions:
+#test that import: test_profiler.py, test_openmp.py, test_dataloader.py
+
+pytest
+#Description: testing framework
+#Pinned versions:
+#test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
+
+pytest-xdist
+#Description: plugin for running pytest in parallel
+#Pinned versions:
+#test that import:
+
+pytest-shard
+#Description: plugin spliting up tests in pytest
+#Pinned versions:
+#test that import:
+
+pytest-flakefinder==1.1.0
+#Description: plugin for rerunning tests a fixed number of times in pytest
+#Pinned versions: 1.1.0
+#test that import:
+
+pytest-rerunfailures
+#Description: plugin for rerunning failure tests in pytest
+#Pinned versions:
+#test that import:
+
+#pytest-benchmark
+#Description: fixture for benchmarking code
+#Pinned versions: 3.2.3
+#test that import:
+
+#pytest-sugar
+#Description: shows failures and errors instantly
+#Pinned versions:
+#test that import:
+
+xdoctest==1.1.0
+#Description: runs doctests in pytest
+#Pinned versions: 1.1.0
+#test that import:
+
+pygments==2.12.0
+#Description: support doctest highlighting
+#Pinned versions: 2.12.0
+#test that import: the doctests
+
+#PyYAML
+#Description: data serialization format
+#Pinned versions:
+#test that import:
+
+#requests
+#Description: HTTP library
+#Pinned versions:
+#test that import: test_type_promotion.py
+
+#rich
+#Description: rich text and beautiful formatting in the terminal
+#Pinned versions: 10.9.0
+#test that import:
+
+scikit-image
+#Description: image processing routines
+#Pinned versions:
+#test that import: test_nn.py
+
+#scikit-learn
+#Description: machine learning package
+#Pinned versions: 0.20.3
+#test that import:
+
+scipy==1.6.3 ; python_version < "3.10"
+scipy==1.8.1 ; python_version == "3.10"
+scipy==1.9.3 ; python_version == "3.11"
+# Pin SciPy because of failing distribution tests (see #60347)
+#Description: scientific python
+#Pinned versions: 1.6.3
+#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
+#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
+#test_linalg.py, test_binary_ufuncs.py
+
+#tabulate
+#Description: Pretty-print tabular data
+#Pinned versions:
+#test that import:
+
+tb-nightly
+#Description: TensorBoard
+#Pinned versions:
+#test that import:
+
+#typing-extensions
+#Description: type hints for python
+#Pinned versions:
+#test that import:
+
+#virtualenv
+#Description: virtual environment for python
+#Pinned versions:
+#test that import:
+
+unittest-xml-reporting<=3.2.0,>=2.0.0
+#Description: saves unit test results to xml
+#Pinned versions:
+#test that import:
+
+lintrunner==0.9.2
+#Description: all about linters
+#Pinned versions: 0.9.2
+#test that import:
+
+rockset==1.0.3
+#Description: queries Rockset
+#Pinned versions: 1.0.3
+#test that import:
+
+ghstack==0.7.1
+#Description: ghstack tool
+#Pinned versions: 0.7.1
+#test that import:
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -0,0 +1,132 @@
+ARG UBUNTU_VERSION
+ARG CUDA_VERSION
+ARG IMAGE_NAME
+
+FROM ${IMAGE_NAME}
+
+ARG UBUNTU_VERSION
+ARG CUDA_VERSION
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install katex
+ARG KATEX
+COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
+RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+ARG CONDA_CMAKE
+COPY requirements-ci.txt /opt/conda/requirements-ci.txt
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
+
+# Install gcc
+ARG GCC_VERSION
+COPY ./common/install_gcc.sh install_gcc.sh
+RUN bash ./install_gcc.sh && rm install_gcc.sh
+
+# Install clang
+ARG CLANG_VERSION
+COPY ./common/install_clang.sh install_clang.sh
+RUN bash ./install_clang.sh && rm install_clang.sh
+
+# (optional) Install protobuf for ONNX
+ARG PROTOBUF
+COPY ./common/install_protobuf.sh install_protobuf.sh
+RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
+RUN rm install_protobuf.sh
+ENV INSTALLED_PROTOBUF ${PROTOBUF}
+
+# (optional) Install database packages like LMDB and LevelDB
+ARG DB
+COPY ./common/install_db.sh install_db.sh
+RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
+RUN rm install_db.sh
+ENV INSTALLED_DB ${DB}
+
+# (optional) Install vision packages like OpenCV and ffmpeg
+ARG VISION
+COPY ./common/install_vision.sh install_vision.sh
+RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
+RUN rm install_vision.sh
+ENV INSTALLED_VISION ${VISION}
+
+# (optional) Install UCC
+ARG UCX_COMMIT
+ARG UCC_COMMIT
+ENV UCX_COMMIT $UCX_COMMIT
+ENV UCC_COMMIT $UCC_COMMIT
+ENV UCX_HOME /usr
+ENV UCC_HOME /usr
+ADD ./common/install_ucc.sh install_ucc.sh
+RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
+RUN rm install_ucc.sh
+
+COPY ./common/install_openssl.sh install_openssl.sh
+ENV OPENSSL_ROOT_DIR /opt/openssl
+RUN bash ./install_openssl.sh
+ENV OPENSSL_DIR /opt/openssl
+
+# (optional) Install non-default CMake version
+ARG CMAKE_VERSION
+COPY ./common/install_cmake.sh install_cmake.sh
+RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
+RUN rm install_cmake.sh
+
+# Install ccache/sccache (do this last, so we get priority in PATH)
+COPY ./common/install_cache.sh install_cache.sh
+ENV PATH /opt/cache/bin:$PATH
+# See https://github.com/pytorch/pytorch/issues/82174
+# TODO(sdym@fb.com):
+# check if this is needed after full off Xenial migration
+ENV CARGO_NET_GIT_FETCH_WITH_CLI true
+RUN bash ./install_cache.sh && rm install_cache.sh
+ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
+
+# Add jni.h for java host build
+COPY ./common/install_jni.sh install_jni.sh
+COPY ./java/jni.h jni.h
+RUN bash ./install_jni.sh && rm install_jni.sh
+
+# Install Open MPI for CUDA
+COPY ./common/install_openmpi.sh install_openmpi.sh
+RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
+RUN rm install_openmpi.sh
+
+# Include BUILD_ENVIRONMENT environment variable in image
+ARG BUILD_ENVIRONMENT
+ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
+
+# AWS specific CUDA build guidance
+ENV TORCH_CUDA_ARCH_LIST Maxwell
+ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
+ENV CUDA_PATH /usr/local/cuda
+
+# Install LLVM dev version (Defined in the pytorch/builder github repository)
+COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
+
+# Install CUDNN
+ARG CUDNN_VERSION
+ARG CUDA_VERSION
+COPY ./common/install_cudnn.sh install_cudnn.sh
+RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
+RUN rm install_cudnn.sh
+
+# Delete /usr/local/cuda-11.X/cuda-11.X symlinks
+RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
+RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/docker/ubuntu-rocm/.gitignore
+++ b/.ci/docker/ubuntu-rocm/.gitignore
@ -0,0 +1 @@
+*.sh
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -0,0 +1,102 @@
+ARG UBUNTU_VERSION
+
+FROM ubuntu:${UBUNTU_VERSION}
+
+ARG UBUNTU_VERSION
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Set AMD gpu targets to build for
+ARG PYTORCH_ROCM_ARCH
+ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Install clang
+ARG LLVMDEV
+ARG CLANG_VERSION
+COPY ./common/install_clang.sh install_clang.sh
+RUN bash ./install_clang.sh && rm install_clang.sh
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ARG CONDA_CMAKE
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+COPY requirements-ci.txt /opt/conda/requirements-ci.txt
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
+
+# Install gcc
+ARG GCC_VERSION
+COPY ./common/install_gcc.sh install_gcc.sh
+RUN bash ./install_gcc.sh && rm install_gcc.sh
+
+# (optional) Install protobuf for ONNX
+ARG PROTOBUF
+COPY ./common/install_protobuf.sh install_protobuf.sh
+RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
+RUN rm install_protobuf.sh
+ENV INSTALLED_PROTOBUF ${PROTOBUF}
+
+# (optional) Install database packages like LMDB and LevelDB
+ARG DB
+COPY ./common/install_db.sh install_db.sh
+RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
+RUN rm install_db.sh
+ENV INSTALLED_DB ${DB}
+
+# (optional) Install vision packages like OpenCV and ffmpeg
+ARG VISION
+COPY ./common/install_vision.sh install_vision.sh
+RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
+RUN rm install_vision.sh
+ENV INSTALLED_VISION ${VISION}
+
+# Install rocm
+ARG ROCM_VERSION
+COPY ./common/install_rocm.sh install_rocm.sh
+RUN bash ./install_rocm.sh
+RUN rm install_rocm.sh
+COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
+RUN bash ./install_rocm_magma.sh
+RUN rm install_rocm_magma.sh
+ENV PATH /opt/rocm/bin:$PATH
+ENV PATH /opt/rocm/hcc/bin:$PATH
+ENV PATH /opt/rocm/hip/bin:$PATH
+ENV PATH /opt/rocm/opencl/bin:$PATH
+ENV PATH /opt/rocm/llvm/bin:$PATH
+ENV MAGMA_HOME /opt/rocm/magma
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
+# (optional) Install non-default CMake version
+ARG CMAKE_VERSION
+COPY ./common/install_cmake.sh install_cmake.sh
+RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
+RUN rm install_cmake.sh
+
+# (optional) Install non-default Ninja version
+ARG NINJA_VERSION
+COPY ./common/install_ninja.sh install_ninja.sh
+RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
+RUN rm install_ninja.sh
+
+# Install ccache/sccache (do this last, so we get priority in PATH)
+COPY ./common/install_cache.sh install_cache.sh
+ENV PATH /opt/cache/bin:$PATH
+RUN bash ./install_cache.sh && rm install_cache.sh
+
+# Include BUILD_ENVIRONMENT environment variable in image
+ARG BUILD_ENVIRONMENT
+ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -0,0 +1,165 @@
+ARG UBUNTU_VERSION
+
+FROM ubuntu:${UBUNTU_VERSION}
+
+ARG UBUNTU_VERSION
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ARG CLANG_VERSION
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Install clang
+ARG LLVMDEV
+COPY ./common/install_clang.sh install_clang.sh
+RUN bash ./install_clang.sh && rm install_clang.sh
+
+# (optional) Install thrift.
+ARG THRIFT
+COPY ./common/install_thrift.sh install_thrift.sh
+RUN if [ -n "${THRIFT}" ]; then bash ./install_thrift.sh; fi
+RUN rm install_thrift.sh
+ENV INSTALLED_THRIFT ${THRIFT}
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install katex
+ARG KATEX
+COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
+RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ARG CONDA_CMAKE
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+COPY requirements-ci.txt /opt/conda/requirements-ci.txt
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
+
+# Install gcc
+ARG GCC_VERSION
+COPY ./common/install_gcc.sh install_gcc.sh
+RUN bash ./install_gcc.sh && rm install_gcc.sh
+
+# Install lcov for C++ code coverage
+COPY ./common/install_lcov.sh install_lcov.sh
+RUN  bash ./install_lcov.sh && rm install_lcov.sh
+
+# Install cuda and cudnn
+ARG CUDA_VERSION
+RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
+RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
+ENV DESIRED_CUDA ${CUDA_VERSION}
+ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
+
+# (optional) Install UCC
+ARG UCX_COMMIT
+ARG UCC_COMMIT
+ENV UCX_COMMIT $UCX_COMMIT
+ENV UCC_COMMIT $UCC_COMMIT
+ENV UCX_HOME /usr
+ENV UCC_HOME /usr
+ADD ./common/install_ucc.sh install_ucc.sh
+RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
+RUN rm install_ucc.sh
+
+# (optional) Install protobuf for ONNX
+ARG PROTOBUF
+COPY ./common/install_protobuf.sh install_protobuf.sh
+RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
+RUN rm install_protobuf.sh
+ENV INSTALLED_PROTOBUF ${PROTOBUF}
+
+# (optional) Install database packages like LMDB and LevelDB
+ARG DB
+COPY ./common/install_db.sh install_db.sh
+RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
+RUN rm install_db.sh
+ENV INSTALLED_DB ${DB}
+
+# (optional) Install vision packages like OpenCV and ffmpeg
+ARG VISION
+COPY ./common/install_vision.sh install_vision.sh
+RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
+RUN rm install_vision.sh
+ENV INSTALLED_VISION ${VISION}
+
+# (optional) Install Android NDK
+ARG ANDROID
+ARG ANDROID_NDK
+ARG GRADLE_VERSION
+COPY ./common/install_android.sh install_android.sh
+COPY ./android/AndroidManifest.xml AndroidManifest.xml
+COPY ./android/build.gradle build.gradle
+RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi
+RUN rm install_android.sh
+RUN rm AndroidManifest.xml
+RUN rm build.gradle
+ENV INSTALLED_ANDROID ${ANDROID}
+
+# (optional) Install Vulkan SDK
+ARG VULKAN_SDK_VERSION
+COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
+RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
+RUN rm install_vulkan_sdk.sh
+
+# (optional) Install swiftshader
+ARG SWIFTSHADER
+COPY ./common/install_swiftshader.sh install_swiftshader.sh
+RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
+RUN rm install_swiftshader.sh
+
+# (optional) Install non-default CMake version
+ARG CMAKE_VERSION
+COPY ./common/install_cmake.sh install_cmake.sh
+RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
+RUN rm install_cmake.sh
+
+# (optional) Install non-default Ninja version
+ARG NINJA_VERSION
+COPY ./common/install_ninja.sh install_ninja.sh
+RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
+RUN rm install_ninja.sh
+
+COPY ./common/install_openssl.sh install_openssl.sh
+RUN bash ./install_openssl.sh
+ENV OPENSSL_ROOT_DIR /opt/openssl
+ENV OPENSSL_DIR /opt/openssl
+RUN rm install_openssl.sh
+
+# Install ccache/sccache (do this last, so we get priority in PATH)
+COPY ./common/install_cache.sh install_cache.sh
+ENV PATH /opt/cache/bin:$PATH
+RUN bash ./install_cache.sh && rm install_cache.sh
+
+# Add jni.h for java host build
+COPY ./common/install_jni.sh install_jni.sh
+COPY ./java/jni.h jni.h
+RUN bash ./install_jni.sh && rm install_jni.sh
+
+# Install Open MPI for CUDA
+COPY ./common/install_openmpi.sh install_openmpi.sh
+RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
+RUN rm install_openmpi.sh
+
+# Include BUILD_ENVIRONMENT environment variable in image
+ARG BUILD_ENVIRONMENT
+ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
+
+# Install LLVM dev version (Defined in the pytorch/builder github repository)
+COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
+
+# AWS specific CUDA build guidance
+ENV TORCH_CUDA_ARCH_LIST Maxwell
+ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
+ENV CUDA_PATH /usr/local/cuda
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/onnx/README.md
+++ b/.ci/onnx/README.md
@ -0,0 +1,14 @@
+# Jenkins
+
+The scripts in this directory are the entrypoint for testing ONNX exporter.
+
+The environment variable `BUILD_ENVIRONMENT` is expected to be set to
+the build environment you intend to test. It is a hint for the build
+and test scripts to configure Caffe2 a certain way and include/exclude
+tests. Docker images, they equal the name of the image itself. For
+example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
+built on Jenkins and are used in triggered builds already have this
+environment variable set in their manifest. Also see
+`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
+
+Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.
--- a/.ci/onnx/common.sh
+++ b/.ci/onnx/common.sh
@ -0,0 +1,19 @@
+set -ex
+
+LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
+TEST_DIR="$ROOT_DIR/test"
+pytest_reports_dir="${TEST_DIR}/test-reports/python"
+
+# Figure out which Python to use
+PYTHON="$(which python)"
+if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
+  PYTHON=$(which "python${BASH_REMATCH[1]}")
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
+    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
+    unset HIP_PLATFORM
+fi
+
+mkdir -p "$pytest_reports_dir" || true
--- a/.ci/onnx/test.sh
+++ b/.ci/onnx/test.sh
@ -0,0 +1,74 @@
+#!/bin/bash
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
+  pip install click mock tabulate networkx==2.0
+  pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
+fi
+
+# Skip tests in environments where they are not built/applicable
+if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
+  echo 'Skipping tests'
+  exit 0
+fi
+if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
+  # temporary to locate some kernel issues on the CI nodes
+  export HSAKMT_DEBUG_LEVEL=4
+fi
+# These additional packages are needed for circleci ROCm builds.
+if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
+    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
+    # defaults installs the most recent networkx version, so we install this lower
+    # version explicitly before scikit-image pulls it in as a dependency
+    pip install networkx==2.0
+    # click - onnx
+    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
+fi
+
+################################################################################
+# Python tests #
+################################################################################
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  exit 0
+fi
+
+# If pip is installed as root, we must use sudo.
+# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
+PIP=$(which pip)
+PIP_USER=$(stat --format '%U' $PIP)
+CURRENT_USER=$(id -u -n)
+if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
+  MAYBE_SUDO=sudo
+fi
+
+# Uninstall pre-installed hypothesis and coverage to use an older version as newer
+# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
+$MAYBE_SUDO pip -q uninstall -y hypothesis
+$MAYBE_SUDO pip -q uninstall -y coverage
+
+# "pip install hypothesis==3.44.6" from official server is unreliable on
+# CircleCI, so we host a copy on S3 instead
+$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
+$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
+$MAYBE_SUDO pip -q install hypothesis==4.57.1
+
+##############
+# ONNX tests #
+##############
+if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
+  pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
+  pip install -q --user transformers==4.25.1
+  pip install -q --user ninja flatbuffers==2.0 numpy==1.22.4 onnxruntime==1.14.0 beartype==0.10.4
+  # TODO: change this when onnx 1.13.1 is released.
+  pip install --no-use-pep517 'onnx @ git+https://github.com/onnx/onnx@e192ba01e438d22ca2dedd7956e28e3551626c91'
+  # TODO: change this when onnx-script is on testPypi
+  pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@a71e35bcd72537bf7572536ee57250a0c0488bf6'
+  # numba requires numpy <= 1.20, onnxruntime requires numpy >= 1.21.
+  # We don't actually need it for our tests, but it's imported if it's present, so uninstall.
+  pip uninstall -q --yes numba
+  # JIT C++ extensions require ninja, so put it into PATH.
+  export PATH="/var/lib/jenkins/.local/bin:$PATH"
+  "$ROOT_DIR/scripts/onnx/test.sh"
+fi
--- a/.ci/pytorch/.shellcheckrc
+++ b/.ci/pytorch/.shellcheckrc
@ -0,0 +1,4 @@
+source-path=SCRIPTDIR
+
+# we'd like to enable --external-sources here but can't
+# https://github.com/koalaman/shellcheck/issues/1818
--- a/.ci/pytorch/README.md
+++ b/.ci/pytorch/README.md
@ -0,0 +1,42 @@
+This directory contains scripts for our continuous integration.
+
+One important thing to keep in mind when reading the scripts here is
+that they are all based off of Docker images, which we build for each of
+the various system configurations we want to run on Jenkins.  This means
+it is very easy to run these tests yourself:
+
+1. Figure out what Docker image you want.  The general template for our
+   images look like:
+   ``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
+   where ``$BUILD_ENVIRONMENT`` is one of the build environments
+   enumerated in
+   [pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.ci/docker/build.sh). The dockerfile used by jenkins can be found under the `.ci` [directory](https://github.com/pytorch/pytorch/blob/master/.ci/docker)
+
+2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
+   run one of the scripts in this directory.
+
+The Docker images are designed so that any "reasonable" build commands
+will work; if you look in [build.sh](build.sh) you will see that it is a
+very simple script.  This is intentional.  Idiomatic build instructions
+should work inside all of our Docker images.  You can tweak the commands
+however you need (e.g., in case you want to rebuild with DEBUG, or rerun
+the build with higher verbosity, etc.).
+
+We have to do some work to make this so.  Here is a summary of the
+mechanisms we use:
+
+- We install binaries to directories like `/usr/local/bin` which
+  are automatically part of your PATH.
+
+- We add entries to the PATH using Docker ENV variables (so
+  they apply when you enter Docker) and `/etc/environment` (so they
+  continue to apply even if you sudo), instead of modifying
+  `PATH` in our build scripts.
+
+- We use `/etc/ld.so.conf.d` to register directories containing
+  shared libraries, instead of modifying `LD_LIBRARY_PATH` in our
+  build scripts.
+
+- We reroute well known paths like `/usr/bin/gcc` to alternate
+  implementations with `update-alternatives`, instead of setting
+  `CC` and `CXX` in our implementations.
--- a/.ci/pytorch/build-asan.sh
+++ b/.ci/pytorch/build-asan.sh
@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Required environment variable: $BUILD_ENVIRONMENT
+# (This is set by default in the Docker images we build, so you don't
+# need to set it yourself.
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+# shellcheck source=./common-build.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
+
+echo "Clang version:"
+clang --version
+
+python tools/stats/export_test_times.py
+
+# detect_leaks=0: Python is very leaky, so we need suppress it
+# symbolize=1: Gives us much better errors when things go wrong
+export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=1:symbolize=1:detect_odr_violation=0
+if [ -n "$(which conda)" ]; then
+  export CMAKE_PREFIX_PATH=/opt/conda
+fi
+
+# TODO: Make the ASAN flags a centralized env var and unify with USE_ASAN option
+CC="clang" CXX="clang++" LDSHARED="clang --shared" \
+  CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-address-use-after-scope -shared-libasan" \
+  USE_ASAN=1 USE_CUDA=0 USE_MKLDNN=0 \
+  python setup.py bdist_wheel
+  pip_install_whl "$(echo dist/*.whl)"
+
+# Test building via the sdist source tarball
+python setup.py sdist
+mkdir -p /tmp/tmp
+pushd /tmp/tmp
+tar zxf "$(dirname "${BASH_SOURCE[0]}")/../../dist/"*.tar.gz
+cd torch-*
+python setup.py build --cmake-only
+popd
+
+print_sccache_stats
+
+assert_git_not_dirty
--- a/.ci/pytorch/build-mobile.sh
+++ b/.ci/pytorch/build-mobile.sh
@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
+set -eu -o pipefail
+
+# This script uses linux host toolchain + mobile build options in order to
+# build & test mobile libtorch without having to setup Android/iOS
+# toolchain/simulator.
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+# shellcheck source=./common-build.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
+
+# Install torch & torchvision - used to download & trace test model.
+# Ideally we should use the libtorch built on the PR so that backward
+# incompatible changes won't break this script - but it will significantly slow
+# down mobile CI jobs.
+# Here we install nightly instead of stable so that we have an option to
+# temporarily skip mobile CI jobs on BC-breaking PRs until they are in nightly.
+retry pip install --pre torch torchvision \
+  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html \
+  --progress-bar off
+
+# Run end-to-end process of building mobile library, linking into the predictor
+# binary, and running forward pass with a real model.
+if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
+  TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
+elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
+  test/mobile/lightweight_dispatch/build.sh
+else
+  TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
+fi
+
+print_sccache_stats
--- a/.ci/pytorch/build-tsan.sh
+++ b/.ci/pytorch/build-tsan.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+# Required environment variable: $BUILD_ENVIRONMENT
+# (This is set by default in the Docker images we build, so you don't
+# need to set it yourself.
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+# shellcheck source=./common-build.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
+
+echo "Clang version:"
+clang --version
+
+python tools/stats/export_test_times.py
+
+if [ -n "$(which conda)" ]; then
+  export CMAKE_PREFIX_PATH=/opt/conda
+fi
+
+CC="clang" CXX="clang++" LDSHARED="clang --shared" \
+  CFLAGS="-fsanitize=thread" \
+  USE_TSAN=1 USE_CUDA=0 USE_MKLDNN=0 \
+  python setup.py bdist_wheel
+  pip_install_whl "$(echo dist/*.whl)"
+
+print_sccache_stats
+
+assert_git_not_dirty
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -0,0 +1,318 @@
+#!/bin/bash
+
+set -ex
+
+# Required environment variable: $BUILD_ENVIRONMENT
+# (This is set by default in the Docker images we build, so you don't
+# need to set it yourself.
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+# shellcheck source=./common-build.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
+
+if [[ "$BUILD_ENVIRONMENT" == *-clang7-asan* ]]; then
+  exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" "$@"
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *-clang7-tsan* ]]; then
+  exec "$(dirname "${BASH_SOURCE[0]}")/build-tsan.sh" "$@"
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
+  exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
+fi
+
+echo "Python version:"
+python --version
+
+echo "GCC version:"
+gcc --version
+
+echo "CMake version:"
+cmake --version
+
+echo "Environment variables:"
+env
+
+if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
+  echo "NVCC version:"
+  nvcc --version
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
+  if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
+    # TODO: there is a linking issue when building with UCC using clang,
+    # disable it for now and to be fix later.
+    export USE_UCC=1
+    export USE_SYSTEM_UCC=1
+  fi
+fi
+
+if [[ ${BUILD_ENVIRONMENT} == *"caffe2"* ]]; then
+  echo "Caffe2 build is ON"
+  export BUILD_CAFFE2=ON
+fi
+
+if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
+  export ATEN_THREADING=TBB
+  export USE_TBB=1
+elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
+  export ATEN_THREADING=NATIVE
+fi
+
+# Enable LLVM dependency for TensorExpr testing
+if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
+  export USE_LLVM=/opt/rocm/llvm
+  export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
+else
+  export USE_LLVM=/opt/llvm
+  export LLVM_DIR=/opt/llvm/lib/cmake/llvm
+fi
+
+if ! which conda; then
+  # In ROCm CIs, we are doing cross compilation on build machines with
+  # intel cpu and later run tests on machines with amd cpu.
+  # Also leave out two builds to make sure non-mkldnn builds still work.
+  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
+    export USE_MKLDNN=1
+  else
+    export USE_MKLDNN=0
+  fi
+else
+  export CMAKE_PREFIX_PATH=/opt/conda
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
+  POSSIBLE_JAVA_HOMES=()
+  POSSIBLE_JAVA_HOMES+=(/usr/local)
+  POSSIBLE_JAVA_HOMES+=(/usr/lib/jvm/java-8-openjdk-amd64)
+  POSSIBLE_JAVA_HOMES+=(/Library/Java/JavaVirtualMachines/*.jdk/Contents/Home)
+  # Add the Windows-specific JNI
+  POSSIBLE_JAVA_HOMES+=("$PWD/.circleci/windows-jni/")
+  for JH in "${POSSIBLE_JAVA_HOMES[@]}" ; do
+    if [[ -e "$JH/include/jni.h" ]] ; then
+      # Skip if we're not on Windows but haven't found a JAVA_HOME
+      if [[ "$JH" == "$PWD/.circleci/windows-jni/" && "$OSTYPE" != "msys" ]] ; then
+        break
+      fi
+      echo "Found jni.h under $JH"
+      export JAVA_HOME="$JH"
+      export BUILD_JNI=ON
+      break
+    fi
+  done
+  if [ -z "$JAVA_HOME" ]; then
+    echo "Did not find jni.h"
+  fi
+fi
+
+# Use special scripts for Android builds
+if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
+  export ANDROID_NDK=/opt/ndk
+  build_args=()
+  if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
+    build_args+=("-DANDROID_ABI=armeabi-v7a")
+  elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
+    build_args+=("-DANDROID_ABI=arm64-v8a")
+  elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
+    build_args+=("-DANDROID_ABI=x86")
+  elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
+    build_args+=("-DANDROID_ABI=x86_64")
+  fi
+  if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
+    build_args+=("-DUSE_VULKAN=ON")
+  fi
+  build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
+  exec ./scripts/build_android.sh "${build_args[@]}" "$@"
+fi
+
+if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
+  export USE_VULKAN=1
+  # shellcheck disable=SC1091
+  source /var/lib/jenkins/vulkansdk/setup-env.sh
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
+  # hcc used to run out of memory, silently exiting without stopping
+  # the build process, leaving undefined symbols in the shared lib,
+  # causing undefined symbol errors when later running tests.
+  # We used to set MAX_JOBS to 4 to avoid, but this is no longer an issue.
+  if [ -z "$MAX_JOBS" ]; then
+    export MAX_JOBS=$(($(nproc) - 1))
+  fi
+
+  if [[ -n "$CI" && -z "$PYTORCH_ROCM_ARCH" ]]; then
+      # Set ROCM_ARCH to gfx906 for CI builds, if user doesn't override.
+      echo "Limiting PYTORCH_ROCM_ARCH to gfx906 for CI builds"
+      export PYTORCH_ROCM_ARCH="gfx906"
+  fi
+
+  # hipify sources
+  python tools/amd_build/build_amd.py
+fi
+
+# sccache will fail for CUDA builds if all cores are used for compiling
+# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
+if [ -z "$MAX_JOBS" ]; then
+  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
+    export MAX_JOBS=$(($(nproc) - 1))
+  fi
+fi
+
+# TORCH_CUDA_ARCH_LIST must be passed from an environment variable
+if [[ "$BUILD_ENVIRONMENT" == *cuda* && -z "$TORCH_CUDA_ARCH_LIST" ]]; then
+  echo "TORCH_CUDA_ARCH_LIST must be defined"
+  exit 1
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
+  export CC=clang
+  export CXX=clang++
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
+  export USE_PER_OPERATOR_HEADERS=0
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
+    export USE_PRECOMPILED_HEADERS=1
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build*  ]]; then
+  export USE_GLOO_WITH_OPENSSL=ON
+fi
+
+if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
+  export BUILD_STATIC_RUNTIME_BENCHMARK=ON
+fi
+
+if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
+  set -e
+
+  get_bazel
+
+  # Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
+  # the runner
+  BAZEL_MEM_LIMIT="--local_ram_resources=HOST_RAM*.8"
+  BAZEL_CPU_LIMIT="--local_cpu_resources=HOST_CPUS-1"
+
+  tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" //...
+  # Build torch, the Python module, and tests for CPU-only
+  tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" --config=cpu-only :torch :_C.so :all_tests
+
+else
+  # check that setup.py would fail with bad arguments
+  echo "The next three invocations are expected to fail with invalid command error messages."
+  ( ! get_exit_code python setup.py bad_argument )
+  ( ! get_exit_code python setup.py clean] )
+  ( ! get_exit_code python setup.py clean bad_argument )
+
+  if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
+
+    # rocm builds fail when WERROR=1
+    # XLA test build fails when WERROR=1
+    # set only when building other architectures
+    # or building non-XLA tests.
+    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
+          "$BUILD_ENVIRONMENT" != *xla* ]]; then
+      WERROR=1 python setup.py bdist_wheel
+    else
+      python setup.py bdist_wheel
+    fi
+    pip_install_whl "$(echo dist/*.whl)"
+
+    # TODO: I'm not sure why, but somehow we lose verbose commands
+    set -x
+
+    assert_git_not_dirty
+    # Copy ninja build logs to dist folder
+    mkdir -p dist
+    if [ -f build/.ninja_log ]; then
+      cp build/.ninja_log dist
+    fi
+
+    if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
+      # remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
+      sudo rm -f /opt/cache/bin/cc
+      sudo rm -f /opt/cache/bin/c++
+      sudo rm -f /opt/cache/bin/gcc
+      sudo rm -f /opt/cache/bin/g++
+      pushd /opt/rocm/llvm/bin
+      if [[ -d original ]]; then
+        sudo mv original/clang .
+        sudo mv original/clang++ .
+      fi
+      sudo rm -rf original
+      popd
+    fi
+
+    CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}
+    CUSTOM_TEST_USE_ROCM=$([[ "$BUILD_ENVIRONMENT" == *rocm* ]] && echo "ON" || echo "OFF")
+    CUSTOM_TEST_MODULE_PATH="${PWD}/cmake/public"
+    mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
+
+    # Build custom operator tests.
+    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
+    CUSTOM_OP_TEST="$PWD/test/custom_operator"
+    python --version
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+    mkdir -p "$CUSTOM_OP_BUILD"
+    pushd "$CUSTOM_OP_BUILD"
+    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
+          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
+    make VERBOSE=1
+    popd
+    assert_git_not_dirty
+
+    # Build jit hook tests
+    JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
+    JIT_HOOK_TEST="$PWD/test/jit_hooks"
+    python --version
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+    mkdir -p "$JIT_HOOK_BUILD"
+    pushd "$JIT_HOOK_BUILD"
+    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
+          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
+    make VERBOSE=1
+    popd
+    assert_git_not_dirty
+
+    # Build custom backend tests.
+    CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
+    CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
+    python --version
+    mkdir -p "$CUSTOM_BACKEND_BUILD"
+    pushd "$CUSTOM_BACKEND_BUILD"
+    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
+          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
+    make VERBOSE=1
+    popd
+    assert_git_not_dirty
+  else
+    # Test no-Python build
+    echo "Building libtorch"
+
+    # This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
+    # is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
+    # 16 CPUs
+    MAX_JOBS=$(nproc --ignore=4)
+    export MAX_JOBS
+
+    # NB: Install outside of source directory (at the same level as the root
+    # pytorch folder) so that it doesn't get cleaned away prior to docker push.
+    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
+    mkdir -p ../cpp-build/caffe2
+    pushd ../cpp-build/caffe2
+    WERROR=1 VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
+    popd
+  fi
+fi
+
+if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
+  # export test times so that potential sharded tests that'll branch off this build will use consistent data
+  # don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
+  python tools/stats/export_test_times.py
+fi
+
+print_sccache_stats
--- a/.ci/pytorch/codegen-test.sh
+++ b/.ci/pytorch/codegen-test.sh
@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+# This script can also be used to test whether your diff changes any codegen output.
+#
+# Run it before and after your change:
+#   .ci/pytorch/codegen-test.sh <baseline_output_dir>
+#   .ci/pytorch/codegen-test.sh <test_output_dir>
+#
+# Then run diff to compare the generated files:
+#   diff -Naur <baseline_output_dir> <test_output_dir>
+
+set -eu -o pipefail
+
+if [ "$#" -eq 0 ]; then
+  # shellcheck source=./common.sh
+  source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+  OUT="$(dirname "${BASH_SOURCE[0]}")/../../codegen_result"
+else
+  OUT=$1
+fi
+
+set -x
+
+rm -rf "$OUT"
+
+# aten codegen
+python -m torchgen.gen \
+  -s aten/src/ATen \
+  -d "$OUT"/torch/share/ATen
+
+# torch codegen
+python -m tools.setup_helpers.generate_code \
+  --install_dir "$OUT"
+
+# pyi codegen
+mkdir -p "$OUT"/pyi/torch/_C
+mkdir -p "$OUT"/pyi/torch/nn
+python -m tools.pyi.gen_pyi \
+  --native-functions-path aten/src/ATen/native/native_functions.yaml \
+  --tags-path aten/src/ATen/native/tags.yaml \
+  --deprecated-functions-path tools/autograd/deprecated.yaml \
+  --out "$OUT"/pyi
+
+# autograd codegen (called by torch codegen but can run independently)
+python -m tools.autograd.gen_autograd \
+  "$OUT"/torch/share/ATen/Declarations.yaml \
+  aten/src/ATen/native/native_functions.yaml \
+  aten/src/ATen/native/tags.yaml \
+  "$OUT"/autograd \
+  tools/autograd
+
+# annotated_fn_args codegen (called by torch codegen but can run independently)
+mkdir -p "$OUT"/annotated_fn_args
+python -m tools.autograd.gen_annotated_fn_args \
+  aten/src/ATen/native/native_functions.yaml \
+  aten/src/ATen/native/tags.yaml \
+  "$OUT"/annotated_fn_args \
+  tools/autograd
--- a/.ci/pytorch/common-build.sh
+++ b/.ci/pytorch/common-build.sh
@ -0,0 +1,58 @@
+#!/bin/bash
+# Required environment variables:
+#   $BUILD_ENVIRONMENT (should be set by your Docker image)
+
+if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
+    # Save the absolute path in case later we chdir (as occurs in the gpu perf test)
+    script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
+
+    if which sccache > /dev/null; then
+        # Save sccache logs to file
+        sccache --stop-server > /dev/null  2>&1 || true
+        rm -f ~/sccache_error.log || true
+
+        function sccache_epilogue() {
+            echo "::group::Sccache Compilation Log"
+            echo '=================== sccache compilation log ==================='
+            python "$script_dir/print_sccache_log.py" ~/sccache_error.log 2>/dev/null || true
+            echo '=========== If your build fails, please take a look at the log above for possible reasons ==========='
+            sccache --show-stats
+            sccache --stop-server || true
+            echo "::endgroup::"
+        }
+
+        # Register the function here so that the error log can be printed even when
+        # sccache fails to start, i.e. timeout error
+        trap_add sccache_epilogue EXIT
+
+        if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
+            # sccache --start-server seems to hang forever on self hosted runners for GHA
+            # so let's just go ahead and skip the --start-server altogether since it seems
+            # as though sccache still gets used even when the sscache server isn't started
+            # explicitly
+            echo "Skipping sccache server initialization, setting environment variables"
+            export SCCACHE_IDLE_TIMEOUT=1200
+            export SCCACHE_ERROR_LOG=~/sccache_error.log
+            export RUST_LOG=sccache::server=error
+        elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
+            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
+        else
+            # increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
+            # https://github.com/pytorch/pytorch/pull/16645
+            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=1200 RUST_LOG=sccache::server=error sccache --start-server
+        fi
+
+        # Report sccache stats for easier debugging
+        sccache --zero-stats
+    fi
+
+    if which ccache > /dev/null; then
+        # Report ccache stats for easier debugging
+        ccache --zero-stats
+        ccache --show-stats
+        function ccache_epilogue() {
+            ccache --show-stats
+        }
+        trap_add ccache_epilogue EXIT
+    fi
+fi
--- a/.ci/pytorch/common.sh
+++ b/.ci/pytorch/common.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Common setup for all Jenkins scripts
+# shellcheck source=./common_utils.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+set -ex
+
+# Required environment variables:
+#   $BUILD_ENVIRONMENT (should be set by your Docker image)
+
+# Figure out which Python to use for ROCm
+if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
+  # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
+  unset HIP_PLATFORM
+  export PYTORCH_TEST_WITH_ROCM=1
+  # temporary to locate some kernel issues on the CI nodes
+  export HSAKMT_DEBUG_LEVEL=4
+  # improve rccl performance for distributed tests
+  export HSA_FORCE_FINE_GRAIN_PCIE=1
+fi
+
+# TODO: Renable libtorch testing for MacOS, see https://github.com/pytorch/pytorch/issues/62598
+# shellcheck disable=SC2034
+BUILD_TEST_LIBTORCH=0
+
+retry () {
+  "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+}
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -0,0 +1,236 @@
+#!/bin/bash
+
+# Common util **functions** that can be sourced in other scripts.
+
+# note: printf is used instead of echo to avoid backslash
+# processing and to properly handle values that begin with a '-'.
+
+log() { printf '%s\n' "$*"; }
+error() { log "ERROR: $*" >&2; }
+fatal() { error "$@"; exit 1; }
+
+retry () {
+    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
+}
+
+# compositional trap taken from https://stackoverflow.com/a/7287873/23845
+# appends a command to a trap
+#
+# - 1st arg:  code to add
+# - remaining args:  names of traps to modify
+#
+trap_add() {
+    trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error"
+    for trap_add_name in "$@"; do
+        trap -- "$(
+            # helper fn to get existing trap command from output
+            # of trap -p
+            extract_trap_cmd() { printf '%s\n' "$3"; }
+            # print existing trap command with newline
+            eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
+            # print the new trap command
+            printf '%s\n' "${trap_add_cmd}"
+        )" "${trap_add_name}" \
+            || fatal "unable to add to trap ${trap_add_name}"
+    done
+}
+# set the trace attribute for the above function.  this is
+# required to modify DEBUG or RETURN traps because functions don't
+# inherit them unless the trace attribute is set
+declare -f -t trap_add
+
+function assert_git_not_dirty() {
+    # TODO: we should add an option to `build_amd.py` that reverts the repo to
+    #       an unmodified state.
+    if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
+        git_status=$(git status --porcelain)
+        if [[ $git_status ]]; then
+            echo "Build left local git repository checkout dirty"
+            echo "git status --porcelain:"
+            echo "${git_status}"
+            exit 1
+        fi
+    fi
+}
+
+function pip_install_whl() {
+  # This is used to install PyTorch and other build artifacts wheel locally
+  # without using any network connection
+  python3 -mpip install --no-index --no-deps "$@"
+}
+
+function pip_install() {
+  # retry 3 times
+  # old versions of pip don't have the "--progress-bar" flag
+  pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
+  pip install "$@" || pip install "$@" || pip install "$@"
+}
+
+function pip_uninstall() {
+  # uninstall 2 times
+  pip uninstall -y "$@" || pip uninstall -y "$@"
+}
+
+function get_exit_code() {
+  set +e
+  "$@"
+  retcode=$?
+  set -e
+  return $retcode
+}
+
+function get_bazel() {
+  if [[ $(uname) == "Darwin" ]]; then
+    # download bazel version
+    retry curl https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-darwin-x86_64  -Lo tools/bazel
+    # verify content
+    echo '74d93848f0c9d592e341e48341c53c87e3cb304a54a2a1ee9cff3df422f0b23c  tools/bazel' | shasum -a 256 -c >/dev/null
+  else
+    # download bazel version
+    retry curl https://ossci-linux.s3.amazonaws.com/bazel-4.2.1-linux-x86_64 -o tools/bazel
+    # verify content
+    echo '1a4f3a3ce292307bceeb44f459883859c793436d564b95319aacb8af1f20557c  tools/bazel' | shasum -a 256 -c >/dev/null
+  fi
+
+  chmod +x tools/bazel
+}
+
+function install_monkeytype {
+  # Install MonkeyType
+  pip_install MonkeyType
+}
+
+
+function get_pinned_commit() {
+  cat .github/ci_commit_pins/"${1}".txt
+}
+
+function install_torchtext() {
+  local commit
+  commit=$(get_pinned_commit text)
+  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${commit}"
+}
+
+function install_torchvision() {
+  local commit
+  commit=$(get_pinned_commit vision)
+  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
+}
+
+function clone_pytorch_xla() {
+  if [[ ! -d ./xla ]]; then
+    git clone --recursive -b r2.0 --quiet https://github.com/pytorch/xla.git
+    pushd xla
+    # pin the xla hash so that we don't get broken by changes to xla
+    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
+    git submodule sync
+    git submodule update --init --recursive
+    popd
+  fi
+}
+
+function install_filelock() {
+  pip_install filelock
+}
+
+function install_triton() {
+  local commit
+  if [[ "${TEST_CONFIG}" == *rocm* ]]; then
+    echo "skipping triton due to rocm"
+  else
+    commit=$(get_pinned_commit triton)
+    if [[ "${BUILD_ENVIRONMENT}" == *gcc7* ]]; then
+      # Trition needs gcc-9 to build
+      sudo apt-get install -y g++-9
+      CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
+    elif [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
+      # Trition needs <filesystem> which surprisingly is not available with clang-9 toolchain
+      sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+      sudo apt-get install -y g++-9
+      CXX=g++-9 pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
+    else
+      pip_install --user "git+https://github.com/openai/triton@${commit}#subdirectory=python"
+    fi
+    pip_install --user jinja2
+  fi
+}
+
+function setup_torchdeploy_deps(){
+  conda install -y -n "py_${ANACONDA_PYTHON_VERSION}" "libpython-static=${ANACONDA_PYTHON_VERSION}"
+  local CC
+  local CXX
+  CC="$(which gcc)"
+  CXX="$(which g++)"
+  export CC
+  export CXX
+  pip install --upgrade pip
+}
+
+function checkout_install_torchdeploy() {
+  local commit
+  commit=$(get_pinned_commit multipy)
+  setup_torchdeploy_deps
+  pushd ..
+  git clone --recurse-submodules https://github.com/pytorch/multipy.git
+  pushd multipy
+  git checkout "${commit}"
+  python multipy/runtime/example/generate_examples.py
+  pip install -e .
+  popd
+  popd
+}
+
+function test_torch_deploy(){
+ pushd ..
+ pushd multipy
+ ./multipy/runtime/build/test_deploy
+ popd
+ popd
+}
+
+function install_huggingface() {
+  local commit
+  commit=$(get_pinned_commit huggingface)
+  pip_install pandas
+  pip_install scipy
+  pip_install "git+https://github.com/huggingface/transformers.git@${commit}#egg=transformers"
+}
+
+function install_timm() {
+  local commit
+  commit=$(get_pinned_commit timm)
+  pip_install pandas
+  pip_install scipy
+  pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
+}
+
+function checkout_install_torchbench() {
+  git clone https://github.com/pytorch/benchmark torchbench
+  pushd torchbench
+  git checkout no_torchaudio
+
+  if [ "$1" ]; then
+    python install.py --continue_on_fail models "$@"
+  else
+    # Occasionally the installation may fail on one model but it is ok to continue
+    # to install and test other models
+    python install.py --continue_on_fail
+  fi
+  popd
+}
+
+function test_functorch() {
+  python test/run_test.py --functorch --verbose
+}
+
+function print_sccache_stats() {
+  echo 'PyTorch Build Statistics'
+  sccache --show-stats
+
+  if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
+    sccache --show-stats --stats-format json | jq .stats \
+      > "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
+  else
+    echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
+  fi
+}
--- a/.ci/pytorch/create_test_cert.py
+++ b/.ci/pytorch/create_test_cert.py
@ -0,0 +1,96 @@
+from datetime import datetime, timedelta
+from tempfile import mkdtemp
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.primitives.asymmetric import rsa
+from cryptography import x509
+from cryptography.x509.oid import NameOID
+from cryptography.hazmat.primitives import hashes
+
+temp_dir = mkdtemp()
+print(temp_dir)
+
+
+def genrsa(path):
+    key = rsa.generate_private_key(
+        public_exponent=65537,
+        key_size=2048,
+    )
+    with open(path, "wb") as f:
+        f.write(key.private_bytes(
+            encoding=serialization.Encoding.PEM,
+            format=serialization.PrivateFormat.TraditionalOpenSSL,
+            encryption_algorithm=serialization.NoEncryption(),
+        ))
+    return key
+
+
+def create_cert(path, C, ST, L, O, key):
+    subject = issuer = x509.Name([
+        x509.NameAttribute(NameOID.COUNTRY_NAME, C),
+        x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
+        x509.NameAttribute(NameOID.LOCALITY_NAME, L),
+        x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
+    ])
+    cert = x509.CertificateBuilder().subject_name(
+        subject
+    ).issuer_name(
+        issuer
+    ).public_key(
+        key.public_key()
+    ).serial_number(
+        x509.random_serial_number()
+    ).not_valid_before(
+        datetime.utcnow()
+    ).not_valid_after(
+        # Our certificate will be valid for 10 days
+        datetime.utcnow() + timedelta(days=10)
+    ).add_extension(
+        x509.BasicConstraints(ca=True, path_length=None), critical=True,
+    ).sign(key, hashes.SHA256())
+    # Write our certificate out to disk.
+    with open(path, "wb") as f:
+        f.write(cert.public_bytes(serialization.Encoding.PEM))
+    return cert
+
+
+def create_req(path, C, ST, L, O, key):
+    csr = x509.CertificateSigningRequestBuilder().subject_name(x509.Name([
+        # Provide various details about who we are.
+        x509.NameAttribute(NameOID.COUNTRY_NAME, C),
+        x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
+        x509.NameAttribute(NameOID.LOCALITY_NAME, L),
+        x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
+    ])).sign(key, hashes.SHA256())
+    with open(path, "wb") as f:
+        f.write(csr.public_bytes(serialization.Encoding.PEM))
+    return csr
+
+
+def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
+    cert = x509.CertificateBuilder().subject_name(
+        csr_cert.subject
+    ).issuer_name(
+        ca_cert.subject
+    ).public_key(
+        csr_cert.public_key()
+    ).serial_number(
+        x509.random_serial_number()
+    ).not_valid_before(
+        datetime.utcnow()
+    ).not_valid_after(
+        # Our certificate will be valid for 10 days
+        datetime.utcnow() + timedelta(days=10)
+        # Sign our certificate with our private key
+    ).sign(private_ca_key, hashes.SHA256())
+    with open(path, "wb") as f:
+        f.write(cert.public_bytes(serialization.Encoding.PEM))
+    return cert
+
+
+ca_key = genrsa(temp_dir + "/ca.key")
+ca_cert = create_cert(temp_dir + "/ca.pem", u"US", u"New York", u"New York", u"Gloo Certificate Authority", ca_key)
+
+pkey = genrsa(temp_dir + "/pkey.key")
+csr = create_req(temp_dir + "/csr.csr", u"US", u"California", u"San Francisco", u"Gloo Testing Company", pkey)
+
+cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)
--- a/.ci/pytorch/docker-build-test.sh
+++ b/.ci/pytorch/docker-build-test.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+docker build -t pytorch .
--- a/.ci/pytorch/docs-test.sh
+++ b/.ci/pytorch/docs-test.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+echo "Testing pytorch docs"
+
+cd docs
+pip_install -r requirements.txt
+make doctest
--- a/.ci/pytorch/fake_numpy/numpy.py
+++ b/.ci/pytorch/fake_numpy/numpy.py
@ -0,0 +1 @@
+raise ModuleNotFoundError("Sorry PyTorch, but our NumPy is in the other folder")
--- a/.ci/pytorch/macos-build-test.sh
+++ b/.ci/pytorch/macos-build-test.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+
+if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-build* ]]; then
+  # shellcheck source=./macos-build.sh
+  source "$(dirname "${BASH_SOURCE[0]}")/macos-build.sh"
+fi
+
+if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-test* ]]; then
+# shellcheck source=./macos-test.sh
+  source "$(dirname "${BASH_SOURCE[0]}")/macos-test.sh"
+fi
--- a/.ci/pytorch/macos-build.sh
+++ b/.ci/pytorch/macos-build.sh
@ -0,0 +1,80 @@
+#!/bin/bash
+
+# shellcheck disable=SC2034
+# shellcheck source=./macos-common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
+# shellcheck source=./common-build.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
+
+# Build PyTorch
+if [ -z "${CI}" ]; then
+  export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer
+fi
+
+# This helper function wraps calls to binaries with sccache, but only if they're not already wrapped with sccache.
+# For example, `clang` will be `sccache clang`, but `sccache clang` will not become `sccache sccache clang`.
+# The way this is done is by detecting the command of the parent pid of the current process and checking whether
+# that is sccache, and wrapping sccache around the process if its parent were not already sccache.
+function write_sccache_stub() {
+  output=$1
+  binary=$(basename "${output}")
+
+  printf "#!/bin/sh\nif [ \$(ps auxc \$(ps auxc -o ppid \$\$ | grep \$\$ | rev | cut -d' ' -f1 | rev) | tr '\\\\n' ' ' | rev | cut -d' ' -f2 | rev) != sccache ]; then\n  exec sccache %s \"\$@\"\nelse\n  exec %s \"\$@\"\nfi" "$(which "${binary}")" "$(which "${binary}")" > "${output}"
+  chmod a+x "${output}"
+}
+
+if which sccache > /dev/null; then
+  # Create temp directory for sccache shims
+  tmp_dir=$(mktemp -d)
+  trap 'rm -rfv ${tmp_dir}' EXIT
+  write_sccache_stub "${tmp_dir}/clang++"
+  write_sccache_stub "${tmp_dir}/clang"
+
+  export PATH="${tmp_dir}:$PATH"
+fi
+
+cross_compile_arm64() {
+  # Cross compilation for arm64
+  # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
+  # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
+  USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
+}
+
+compile_x86_64() {
+  USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
+}
+
+build_lite_interpreter() {
+    echo "Testing libtorch (lite interpreter)."
+
+    CPP_BUILD="$(pwd)/../cpp_build"
+    # Ensure the removal of the tmp directory
+    trap 'rm -rfv ${CPP_BUILD}' EXIT
+    rm -rf "${CPP_BUILD}"
+    mkdir -p "${CPP_BUILD}/caffe2"
+
+    # It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
+    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
+    pushd "${CPP_BUILD}/caffe2" || exit
+    VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
+    popd || exit
+
+    "${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
+}
+
+if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
+  cross_compile_arm64
+elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
+  export BUILD_LITE_INTERPRETER=1
+  build_lite_interpreter
+else
+  compile_x86_64
+fi
+
+if which sccache > /dev/null; then
+  print_sccache_stats
+fi
+
+python tools/stats/export_test_times.py
+
+assert_git_not_dirty
--- a/.ci/pytorch/macos-common.sh
+++ b/.ci/pytorch/macos-common.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+
+# Common prelude for macos-build.sh and macos-test.sh
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+sysctl -a | grep machdep.cpu
+
+# These are required for both the build job and the test job.
+# In the latter to test cpp extensions.
+export MACOSX_DEPLOYMENT_TARGET=10.9
+export CXX=clang++
+export CC=clang
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@ -0,0 +1,186 @@
+#!/bin/bash
+
+# shellcheck disable=SC2034
+# shellcheck source=./macos-common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
+
+if [[ -n "$CONDA_ENV" ]]; then
+  # Use binaries under conda environment
+  export PATH="$CONDA_ENV/bin":$PATH
+fi
+
+# Test that OpenMP is enabled for non-arm64 build
+if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
+  pushd test
+  if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
+    echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
+    exit 1
+  fi
+  popd
+fi
+
+setup_test_python() {
+  # The CircleCI worker hostname doesn't resolve to an address.
+  # This environment variable makes ProcessGroupGloo default to
+  # using the address associated with the loopback interface.
+  export GLOO_SOCKET_IFNAME=lo0
+  echo "Ninja version: $(ninja --version)"
+
+  # Increase default limit on open file handles from 256 to 1024
+  ulimit -n 1024
+}
+
+test_python_all() {
+  setup_test_python
+
+  time python test/run_test.py --verbose --exclude-jit-executor
+
+  assert_git_not_dirty
+}
+
+test_python_shard() {
+  if [[ -z "$NUM_TEST_SHARDS" ]]; then
+    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
+    exit 1
+  fi
+
+  setup_test_python
+
+  time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS"
+
+  assert_git_not_dirty
+}
+
+test_libtorch() {
+  # C++ API
+
+  if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
+    # NB: Install outside of source directory (at the same level as the root
+    # pytorch folder) so that it doesn't get cleaned away prior to docker push.
+    # But still clean it before we perform our own build.
+
+    echo "Testing libtorch"
+
+    CPP_BUILD="$PWD/../cpp-build"
+    rm -rf "$CPP_BUILD"
+    mkdir -p "$CPP_BUILD"/caffe2
+
+    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
+    pushd "$CPP_BUILD"/caffe2
+    VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
+    popd
+
+    python tools/download_mnist.py --quiet -d test/cpp/api/mnist
+
+    # Unfortunately it seems like the test can't load from miniconda3
+    # without these paths being set
+    export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
+    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
+    TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" "$CPP_BUILD"/caffe2/bin/test_api
+
+    assert_git_not_dirty
+  fi
+}
+
+print_cmake_info() {
+  CMAKE_EXEC=$(which cmake)
+  echo "$CMAKE_EXEC"
+
+  CONDA_INSTALLATION_DIR=$(dirname "$CMAKE_EXEC")
+  # Print all libraries under cmake rpath for debugging
+  ls -la "$CONDA_INSTALLATION_DIR/../lib"
+
+  export CMAKE_EXEC
+  # Explicitly add conda env lib folder to cmake rpath to address the flaky issue
+  # where cmake dependencies couldn't be found. This seems to point to how conda
+  # links $CMAKE_EXEC to its package cache when cloning a new environment
+  install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true
+  # Adding the rpath will invalidate cmake signature, so signing it again here
+  # to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid))
+  # with an exit code 137 otherwise
+  codesign -f -s - "${CMAKE_EXEC}" || true
+}
+
+test_custom_backend() {
+  print_cmake_info
+
+  echo "Testing custom backends"
+  pushd test/custom_backend
+  rm -rf build && mkdir build
+  pushd build
+  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
+  make VERBOSE=1
+  popd
+
+  # Run Python tests and export a lowered module.
+  python test_custom_backend.py -v
+  python backend.py --export-module-to=model.pt
+  # Run C++ tests using the exported module.
+  build/test_custom_backend ./model.pt
+  rm -f ./model.pt
+  popd
+  assert_git_not_dirty
+}
+
+test_custom_script_ops() {
+  print_cmake_info
+
+  echo "Testing custom script operators"
+  pushd test/custom_operator
+  # Build the custom operator library.
+  rm -rf build && mkdir build
+  pushd build
+  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
+  make VERBOSE=1
+  popd
+
+  # Run tests Python-side and export a script module.
+  python test_custom_ops.py -v
+  python model.py --export-script-module=model.pt
+  # Run tests C++-side and load the exported script module.
+  build/test_custom_ops ./model.pt
+  popd
+  assert_git_not_dirty
+}
+
+test_jit_hooks() {
+  print_cmake_info
+
+  echo "Testing jit hooks in cpp"
+  pushd test/jit_hooks
+  # Build the custom operator library.
+  rm -rf build && mkdir build
+  pushd build
+  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
+  make VERBOSE=1
+  popd
+
+  # Run tests Python-side and export a script module.
+  python model.py --export-script-module=model
+  # Run tests C++-side and load the exported script module.
+  build/test_jit_hooks ./model
+  popd
+  assert_git_not_dirty
+}
+
+if [[ "${TEST_CONFIG}" == *functorch* ]]; then
+  test_functorch
+elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
+  test_python_shard "${SHARD_NUMBER}"
+  if [[ "${SHARD_NUMBER}" == 1 ]]; then
+    test_libtorch
+    test_custom_script_ops
+  elif [[ "${SHARD_NUMBER}" == 2 ]]; then
+    test_jit_hooks
+    test_custom_backend
+  fi
+else
+  test_python_all
+  test_libtorch
+  test_custom_script_ops
+  test_jit_hooks
+  test_custom_backend
+fi
--- a/.ci/pytorch/multigpu-test.sh
+++ b/.ci/pytorch/multigpu-test.sh
@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Required environment variable: $BUILD_ENVIRONMENT
+# (This is set by default in the Docker images we build, so you don't
+# need to set it yourself.
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+echo "Testing pytorch"
+
+# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
+# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
+# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
+time python test/run_test.py --verbose -i distributed/test_c10d_common
+time python test/run_test.py --verbose -i distributed/test_c10d_gloo
+time python test/run_test.py --verbose -i distributed/test_c10d_nccl
+time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
+time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
+time python test/run_test.py --verbose -i distributed/test_store
+time python test/run_test.py --verbose -i distributed/test_pg_wrapper
+time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
+# FSDP tests
+for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
+# ShardedTensor tests
+time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
+time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
+time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
+time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
+time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
+time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
+time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor
+# Other tests
+time python test/run_test.py --verbose -i test_cuda_primary_ctx
+time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
+assert_git_not_dirty
--- a/.ci/pytorch/perf_test/common.sh
+++ b/.ci/pytorch/perf_test/common.sh
@ -0,0 +1,22 @@
+#!/bin/bash
+set -e
+
+run_test () {
+  rm -rf test_tmp/ && mkdir test_tmp/ && cd test_tmp/
+  "$@"
+  cd .. && rm -rf test_tmp/
+}
+
+get_runtime_of_command () {
+  TIMEFORMAT=%R
+
+  # runtime=$( { time ($@ &> /dev/null); } 2>&1 1>/dev/null)
+  runtime=$( { time "$@"; } 2>&1 1>/dev/null)
+  if [[ $runtime == *"Error"* ]]; then
+    exit 1
+  fi
+  runtime=${runtime#+++ $@}
+  runtime=$(python -c "print($runtime)")
+
+  echo "$runtime"
+}
--- a/.ci/pytorch/perf_test/compare_with_baseline.py
+++ b/.ci/pytorch/perf_test/compare_with_baseline.py
@ -0,0 +1,79 @@
+import sys
+import json
+import math
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--test-name', dest='test_name', action='store',
+                    required=True, help='test name')
+parser.add_argument('--sample-stats', dest='sample_stats', action='store',
+                    required=True, help='stats from sample')
+parser.add_argument('--update', action='store_true',
+                    help='whether to update baseline using stats from sample')
+args = parser.parse_args()
+
+test_name = args.test_name
+
+if 'cpu' in test_name:
+    backend = 'cpu'
+elif 'gpu' in test_name:
+    backend = 'gpu'
+
+data_file_path = '../{}_runtime.json'.format(backend)
+
+with open(data_file_path) as data_file:
+    data = json.load(data_file)
+
+if test_name in data:
+    mean = float(data[test_name]['mean'])
+    sigma = float(data[test_name]['sigma'])
+else:
+    # Let the test pass if baseline number doesn't exist
+    mean = sys.maxsize
+    sigma = 0.001
+
+print("population mean: ", mean)
+print("population sigma: ", sigma)
+
+# Let the test pass if baseline number is NaN (which happened in
+# the past when we didn't have logic for catching NaN numbers)
+if math.isnan(mean) or math.isnan(sigma):
+    mean = sys.maxsize
+    sigma = 0.001
+
+sample_stats_data = json.loads(args.sample_stats)
+
+sample_mean = float(sample_stats_data['mean'])
+sample_sigma = float(sample_stats_data['sigma'])
+
+print("sample mean: ", sample_mean)
+print("sample sigma: ", sample_sigma)
+
+if math.isnan(sample_mean):
+    raise Exception('''Error: sample mean is NaN''')
+elif math.isnan(sample_sigma):
+    raise Exception('''Error: sample sigma is NaN''')
+
+z_value = (sample_mean - mean) / sigma
+
+print("z-value: ", z_value)
+
+if z_value >= 3:
+    raise Exception('''\n
+z-value >= 3, there is high chance of perf regression.\n
+To reproduce this regression, run
+`cd .ci/pytorch/perf_test/ && bash {}.sh` on your local machine
+and compare the runtime before/after your code change.
+'''.format(test_name))
+else:
+    print("z-value < 3, no perf regression detected.")
+    if args.update:
+        print("We will use these numbers as new baseline.")
+        new_data_file_path = '../new_{}_runtime.json'.format(backend)
+        with open(new_data_file_path) as new_data_file:
+            new_data = json.load(new_data_file)
+        new_data[test_name] = {}
+        new_data[test_name]['mean'] = sample_mean
+        new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.1)
+        with open(new_data_file_path, 'w') as new_data_file:
+            json.dump(new_data, new_data_file, indent=4)
--- a/.ci/pytorch/perf_test/get_stats.py
+++ b/.ci/pytorch/perf_test/get_stats.py
@ -0,0 +1,16 @@
+import sys
+import json
+import numpy
+
+sample_data_list = sys.argv[1:]
+sample_data_list = [float(v.strip()) for v in sample_data_list]
+
+sample_mean = numpy.mean(sample_data_list)
+sample_sigma = numpy.std(sample_data_list)
+
+data = {
+    'mean': sample_mean,
+    'sigma': sample_sigma,
+}
+
+print(json.dumps(data))
--- a/.ci/pytorch/perf_test/test_cpu_speed_mini_sequence_labeler.sh
+++ b/.ci/pytorch/perf_test/test_cpu_speed_mini_sequence_labeler.sh
@ -0,0 +1,43 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_cpu_speed_mini_sequence_labeler () {
+  echo "Testing: mini sequence labeler, CPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/benchmark.git
+
+  cd benchmark/
+
+  git checkout 726567a455edbfda6199445922a8cfee82535664
+
+  cd scripts/mini_sequence_labeler
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python main.py)
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_cpu_speed_mini_sequence_labeler "$@"
+fi
--- a/.ci/pytorch/perf_test/test_cpu_speed_mnist.sh
+++ b/.ci/pytorch/perf_test/test_cpu_speed_mnist.sh
@ -0,0 +1,45 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_cpu_speed_mnist () {
+  echo "Testing: MNIST, CPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/examples.git -b perftests
+
+  cd examples/mnist
+
+  conda install -c pytorch torchvision-cpu
+
+  # Download data
+  python main.py --epochs 0
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_cpu_speed_mnist "$@"
+fi
--- a/.ci/pytorch/perf_test/test_cpu_speed_torch.sh
+++ b/.ci/pytorch/perf_test/test_cpu_speed_torch.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+. ./common.sh
+
+test_cpu_speed_torch () {
+  echo "Testing: torch.*, CPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/yf225/perf-tests.git
+
+  if [ "$1" == "compare_with_baseline" ]; then
+    export ARGS=(--compare ../cpu_runtime.json)
+  elif [ "$1" == "compare_and_update" ]; then
+    export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
+  elif [ "$1" == "update_only" ]; then
+    export ARGS=(--update ../new_cpu_runtime.json)
+  fi
+
+  if ! python perf-tests/modules/test_cpu_torch.py "${ARGS[@]}"; then
+    echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
+    exit 1
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_cpu_speed_torch "$@"
+fi
--- a/.ci/pytorch/perf_test/test_cpu_speed_torch_tensor.sh
+++ b/.ci/pytorch/perf_test/test_cpu_speed_torch_tensor.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+. ./common.sh
+
+test_cpu_speed_torch_tensor () {
+  echo "Testing: torch.Tensor.*, CPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/yf225/perf-tests.git
+
+  if [ "$1" == "compare_with_baseline" ]; then
+    export ARGS=(--compare ../cpu_runtime.json)
+  elif [ "$1" == "compare_and_update" ]; then
+    export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json)
+  elif [ "$1" == "update_only" ]; then
+    export ARGS=(--update ../new_cpu_runtime.json)
+  fi
+
+  if ! python perf-tests/modules/test_cpu_torch_tensor.py "${ARGS[@]}"; then
+    echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change."
+    exit 1
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_cpu_speed_torch_tensor "$@"
+fi
--- a/.ci/pytorch/perf_test/test_gpu_speed_cudnn_lstm.sh
+++ b/.ci/pytorch/perf_test/test_gpu_speed_cudnn_lstm.sh
@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_gpu_speed_cudnn_lstm () {
+  echo "Testing: CuDNN LSTM, GPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/benchmark.git
+
+  cd benchmark/
+
+  git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
+
+  cd scripts/
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python cudnn_lstm.py --skip-cpu-governor-check)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_gpu_speed_cudnn_lstm "$@"
+fi
--- a/.ci/pytorch/perf_test/test_gpu_speed_lstm.sh
+++ b/.ci/pytorch/perf_test/test_gpu_speed_lstm.sh
@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_gpu_speed_lstm () {
+  echo "Testing: LSTM, GPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/benchmark.git
+
+  cd benchmark/
+
+  git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
+
+  cd scripts/
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python lstm.py --skip-cpu-governor-check)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_gpu_speed_lstm "$@"
+fi
--- a/.ci/pytorch/perf_test/test_gpu_speed_mlstm.sh
+++ b/.ci/pytorch/perf_test/test_gpu_speed_mlstm.sh
@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_gpu_speed_mlstm () {
+  echo "Testing: MLSTM, GPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/benchmark.git
+
+  cd benchmark/
+
+  git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0
+
+  cd scripts/
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python mlstm.py --skip-cpu-governor-check)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_gpu_speed_mlstm "$@"
+fi
--- a/.ci/pytorch/perf_test/test_gpu_speed_mnist.sh
+++ b/.ci/pytorch/perf_test/test_gpu_speed_mnist.sh
@ -0,0 +1,48 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_gpu_speed_mnist () {
+  echo "Testing: MNIST, GPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/examples.git -b perftests
+
+  cd examples/mnist
+
+  conda install -c pytorch torchvision
+
+  # Download data
+  python main.py --epochs 0
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  # Needs warm up to get accurate number
+  python main.py --epochs 1 --no-log
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_gpu_speed_mnist "$@"
+fi
--- a/.ci/pytorch/perf_test/test_gpu_speed_word_language_model.sh
+++ b/.ci/pytorch/perf_test/test_gpu_speed_word_language_model.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+set -e
+
+. ./common.sh
+
+test_gpu_speed_word_language_model () {
+  echo "Testing: word language model on Wikitext-2, GPU"
+
+  export OMP_NUM_THREADS=4
+  export MKL_NUM_THREADS=4
+
+  git clone https://github.com/pytorch/examples.git -b perftests
+
+  cd examples/word_language_model
+
+  cd data/wikitext-2
+
+  # Reduce dataset size, so that we can have more runs per test
+  sed -n '1,200p' test.txt > test_tmp.txt
+  sed -n '1,1000p' train.txt > train_tmp.txt
+  sed -n '1,200p' valid.txt > valid_tmp.txt
+
+  mv test_tmp.txt test.txt
+  mv train_tmp.txt train.txt
+  mv valid_tmp.txt valid.txt
+
+  cd ../..
+
+  SAMPLE_ARRAY=()
+  NUM_RUNS=$1
+
+  for (( i=1; i<=NUM_RUNS; i++ )) do
+    runtime=$(get_runtime_of_command python main.py --cuda --epochs 1)
+    echo "$runtime"
+    SAMPLE_ARRAY+=("${runtime}")
+  done
+
+  cd ../..
+
+  stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}")
+  echo "Runtime stats in seconds:"
+  echo "$stats"
+
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  run_test test_gpu_speed_word_language_model "$@"
+fi
--- a/.ci/pytorch/perf_test/update_commit_hash.py
+++ b/.ci/pytorch/perf_test/update_commit_hash.py
@ -0,0 +1,13 @@
+import sys
+import json
+
+data_file_path = sys.argv[1]
+commit_hash = sys.argv[2]
+
+with open(data_file_path) as data_file:
+    data = json.load(data_file)
+
+data['commit'] = commit_hash
+
+with open(data_file_path, 'w') as data_file:
+    json.dump(data, data_file)
--- a/.ci/pytorch/print_sccache_log.py
+++ b/.ci/pytorch/print_sccache_log.py
@ -0,0 +1,17 @@
+import sys
+
+log_file_path = sys.argv[1]
+
+with open(log_file_path) as f:
+    lines = f.readlines()
+
+for line in lines:
+    # Ignore errors from CPU instruction set, symbol existing testing,
+    # or compilation error formatting
+    ignored_keywords = [
+        'src.c',
+        'CheckSymbolExists.c',
+        'test_compilation_error_formatting',
+    ]
+    if all([keyword not in line for keyword in ignored_keywords]):
+        print(line)
--- a/.ci/pytorch/run_glootls_test.sh
+++ b/.ci/pytorch/run_glootls_test.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+
+CREATE_TEST_CERT="$(dirname "${BASH_SOURCE[0]}")/create_test_cert.py"
+TMP_CERT_DIR=$(python "$CREATE_TEST_CERT")
+
+openssl verify -CAfile "${TMP_CERT_DIR}/ca.pem" "${TMP_CERT_DIR}/cert.pem"
+
+export GLOO_DEVICE_TRANSPORT=TCP_TLS
+export GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY=${TMP_CERT_DIR}/pkey.key
+export GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT=${TMP_CERT_DIR}/cert.pem
+export GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE=${TMP_CERT_DIR}/ca.pem
+
+time python test/run_test.py --include distributed/test_c10d_gloo --verbose -- ProcessGroupGlooTest
+
+unset GLOO_DEVICE_TRANSPORT
+unset GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY
+unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT
+unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE
--- a/.ci/pytorch/short-perf-test-cpu.sh
+++ b/.ci/pytorch/short-perf-test-cpu.sh
@ -0,0 +1,71 @@
+#!/bin/bash
+
+SCRIPT_PARENT_DIR=$(dirname "${BASH_SOURCE[0]}")
+
+# shellcheck source=.ci/pytorch/common.sh
+source "$SCRIPT_PARENT_DIR/common.sh"
+
+cd .ci/pytorch/perf_test
+
+echo "Running CPU perf test for PyTorch..."
+
+pip install -q awscli
+
+# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
+# More info at https://github.com/aws/aws-cli/issues/2321
+aws configure set default.s3.multipart_threshold 5GB
+UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # Get current default branch commit hash
+    DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
+    export DEFAULT_BRANCH_COMMIT_ID
+fi
+
+# Find the default branch commit to test against
+git remote add upstream https://github.com/pytorch/pytorch.git
+git fetch upstream
+IFS=$'\n'
+while IFS='' read -r commit_id; do
+    if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/"${commit_id}".json; then
+        LATEST_TESTED_COMMIT=${commit_id}
+        break
+    fi
+done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
+aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/"${LATEST_TESTED_COMMIT}".json cpu_runtime.json
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # Prepare new baseline file
+    cp cpu_runtime.json new_cpu_runtime.json
+    python update_commit_hash.py new_cpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
+fi
+
+# Include tests
+# shellcheck source=./perf_test/test_cpu_speed_mini_sequence_labeler.sh
+. ./test_cpu_speed_mini_sequence_labeler.sh
+# shellcheck source=./perf_test/test_cpu_speed_mnist.sh
+. ./test_cpu_speed_mnist.sh
+# shellcheck source=./perf_test/test_cpu_speed_torch.sh
+. ./test_cpu_speed_torch.sh
+# shellcheck source=./perf_test/test_cpu_speed_torch_tensor.sh
+. ./test_cpu_speed_torch_tensor.sh
+
+# Run tests
+export TEST_MODE="compare_with_baseline"
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    export TEST_MODE="compare_and_update"
+fi
+
+# Operator tests
+run_test test_cpu_speed_torch ${TEST_MODE}
+run_test test_cpu_speed_torch_tensor ${TEST_MODE}
+
+# Sample model tests
+run_test test_cpu_speed_mini_sequence_labeler 20 ${TEST_MODE}
+run_test test_cpu_speed_mnist 20 ${TEST_MODE}
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # This could cause race condition if we are testing the same default branch commit twice,
+    # but the chance of them executing this line at the same time is low.
+    aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
+fi
--- a/.ci/pytorch/short-perf-test-gpu.sh
+++ b/.ci/pytorch/short-perf-test-gpu.sh
@ -0,0 +1,76 @@
+#!/bin/bash
+
+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+pushd .ci/pytorch/perf_test
+
+echo "Running GPU perf test for PyTorch..."
+
+# Trying to uninstall PyYAML can cause problem. Workaround according to:
+# https://github.com/pypa/pip/issues/5247#issuecomment-415571153
+pip install -q awscli --ignore-installed PyYAML
+
+# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
+# More info at https://github.com/aws/aws-cli/issues/2321
+aws configure set default.s3.multipart_threshold 5GB
+UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')"
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # Get current default branch commit hash
+    DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1)
+    export DEFAULT_BRANCH_COMMIT_ID
+fi
+
+# Find the default branch commit to test against
+git remote add upstream https://github.com/pytorch/pytorch.git
+git fetch upstream
+IFS=$'\n'
+while IFS='' read -r commit_id; do
+    if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/"${commit_id}".json; then
+        LATEST_TESTED_COMMIT=${commit_id}
+        break
+    fi
+done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH")
+aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/"${LATEST_TESTED_COMMIT}".json gpu_runtime.json
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # Prepare new baseline file
+    cp gpu_runtime.json new_gpu_runtime.json
+    python update_commit_hash.py new_gpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}"
+fi
+
+# Include tests
+# shellcheck source=./perf_test/test_gpu_speed_mnist.sh
+. ./test_gpu_speed_mnist.sh
+# shellcheck source=./perf_test/test_gpu_speed_word_language_model.sh
+. ./test_gpu_speed_word_language_model.sh
+# shellcheck source=./perf_test/test_gpu_speed_cudnn_lstm.sh
+. ./test_gpu_speed_cudnn_lstm.sh
+# shellcheck source=./perf_test/test_gpu_speed_lstm.sh
+. ./test_gpu_speed_lstm.sh
+# shellcheck source=./perf_test/test_gpu_speed_mlstm.sh
+. ./test_gpu_speed_mlstm.sh
+
+# Run tests
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    run_test test_gpu_speed_mnist 20 compare_and_update
+    run_test test_gpu_speed_word_language_model 20 compare_and_update
+    run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
+    run_test test_gpu_speed_lstm 20 compare_and_update
+    run_test test_gpu_speed_mlstm 20 compare_and_update
+else
+    run_test test_gpu_speed_mnist 20 compare_with_baseline
+    run_test test_gpu_speed_word_language_model 20 compare_with_baseline
+    run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline
+    run_test test_gpu_speed_lstm 20 compare_with_baseline
+    run_test test_gpu_speed_mlstm 20 compare_with_baseline
+fi
+
+if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then
+    # This could cause race condition if we are testing the same default branch commit twice,
+    # but the chance of them executing this line at the same time is low.
+    aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read
+fi
+
+popd
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
--- a/.ci/pytorch/win-build.sh
+++ b/.ci/pytorch/win-build.sh
@ -0,0 +1,65 @@
+#!/bin/bash
+
+# If you want to rebuild, run this with REBUILD=1
+# If you want to build with CUDA, run this with USE_CUDA=1
+# If you want to build without CUDA, run this with USE_CUDA=0
+
+if [ ! -f setup.py ]; then
+  echo "ERROR: Please run this build script from PyTorch root directory."
+  exit 1
+fi
+
+SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+# shellcheck source=./common.sh
+source "$SCRIPT_PARENT_DIR/common.sh"
+# shellcheck source=./common-build.sh
+source "$SCRIPT_PARENT_DIR/common-build.sh"
+
+IMAGE_COMMIT_ID=$(git rev-parse HEAD)
+export IMAGE_COMMIT_ID
+export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}
+if [[ ${JOB_NAME} == *"develop"* ]]; then
+  export IMAGE_COMMIT_TAG=develop-${IMAGE_COMMIT_TAG}
+fi
+
+export TMP_DIR="${PWD}/build/win_tmp"
+TMP_DIR_WIN=$(cygpath -w "${TMP_DIR}")
+export TMP_DIR_WIN
+export PYTORCH_FINAL_PACKAGE_DIR=${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-results}
+if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
+    mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
+fi
+
+# This directory is used only to hold "pytorch_env_restore.bat", called via "setup_pytorch_env.bat"
+CI_SCRIPTS_DIR=$TMP_DIR/ci_scripts
+mkdir -p "$CI_SCRIPTS_DIR"
+
+if [ -n "$(ls "$CI_SCRIPTS_DIR"/*)" ]; then
+    rm "$CI_SCRIPTS_DIR"/*
+fi
+
+export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
+
+set +ex
+grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
+PYLONG_API_CHECK=$?
+if [[ $PYLONG_API_CHECK == 0 ]]; then
+  echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
+  echo "because \`sizeof(long) == 4\` and \`sizeof(unsigned long) == 4\`."
+  echo "Please include \"torch/csrc/utils/python_numbers.h\" and use the correspoding APIs instead."
+  echo "PyLong_FromLong -> THPUtils_packInt32 / THPUtils_packInt64"
+  echo "PyLong_AsLong -> THPUtils_unpackInt (32-bit) / THPUtils_unpackLong (64-bit)"
+  echo "PyLong_FromUnsignedLong -> THPUtils_packUInt32 / THPUtils_packUInt64"
+  echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
+  exit 1
+fi
+set -ex
+
+"$SCRIPT_HELPERS_DIR"/build_pytorch.bat
+
+assert_git_not_dirty
+
+if [ ! -f "${TMP_DIR}"/"${IMAGE_COMMIT_TAG}".7z ] && [ ! "${BUILD_ENVIRONMENT}" == "" ]; then
+    exit 1
+fi
+echo "BUILD PASSED"
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -0,0 +1,160 @@
+if "%DEBUG%" == "1" (
+  set BUILD_TYPE=debug
+) ELSE (
+  set BUILD_TYPE=release
+)
+
+set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
+
+:: This inflates our log size slightly, but it is REALLY useful to be
+:: able to see what our cl.exe commands are (since you can actually
+:: just copy-paste them into a local Windows setup to just rebuild a
+:: single file.)
+:: log sizes are too long, but leaving this here incase someone wants to use it locally
+:: set CMAKE_VERBOSE_MAKEFILE=1
+
+
+set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
+
+
+call %INSTALLER_DIR%\install_mkl.bat
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+
+call %INSTALLER_DIR%\install_magma.bat
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+
+call %INSTALLER_DIR%\install_sccache.bat
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+
+:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
+:: We just need to activate it here
+call %INSTALLER_DIR%\activate_miniconda3.bat
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+
+:: Override VS env here
+pushd .
+if "%VC_VERSION%" == "" (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
+) else (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
+)
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+@echo on
+popd
+
+if not "%USE_CUDA%"=="1" goto cuda_build_end
+
+set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
+
+if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
+    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
+    exit /b 1
+)
+rem version transformer, for example 10.1 to 10_1.
+if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
+    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
+    exit /b 1
+)
+set VERSION_SUFFIX=%CUDA_VERSION:.=_%
+set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
+
+set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
+set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
+set CUDNN_ROOT_DIR=%CUDA_PATH%
+set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
+set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
+
+set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
+set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
+set CUDNN_ROOT_DIR=%CUDA_PATH%
+set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
+set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
+
+:cuda_build_end
+
+set DISTUTILS_USE_SDK=1
+set PATH=%TMP_DIR_WIN%\bin;%PATH%
+
+:: The latest Windows CUDA test is running on AWS G5 runner with A10G GPU
+if "%TORCH_CUDA_ARCH_LIST%" == "" set TORCH_CUDA_ARCH_LIST=8.6
+
+:: The default sccache idle timeout is 600, which is too short and leads to intermittent build errors.
+set SCCACHE_IDLE_TIMEOUT=0
+set SCCACHE_IGNORE_SERVER_IO_ERROR=1
+sccache --stop-server
+sccache --start-server
+sccache --zero-stats
+set CC=sccache-cl
+set CXX=sccache-cl
+
+set CMAKE_GENERATOR=Ninja
+
+if "%USE_CUDA%"=="1" (
+  :: randomtemp is used to resolve the intermittent build error related to CUDA.
+  :: code: https://github.com/peterjc123/randomtemp-rust
+  :: issue: https://github.com/pytorch/pytorch/issues/25393
+  ::
+  :: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
+  :: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
+  curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+  echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
+  cat %TMP_DIR%/bin/nvcc.bat
+  set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
+  for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
+  set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
+)
+
+@echo off
+echo @echo off >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
+for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%\ci_scripts\pytorch_env_restore.bat
+@echo on
+
+if "%REBUILD%" == "" (
+  if NOT "%BUILD_ENVIRONMENT%" == "" (
+    :: Create a shortcut to restore pytorch environment
+    echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+    echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+    echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+
+    aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
+    if errorlevel 1 exit /b
+    if not errorlevel 0 exit /b
+  )
+)
+
+python setup.py bdist_wheel
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+sccache --show-stats
+python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
+(
+  if "%BUILD_ENVIRONMENT%"=="" (
+    echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
+  ) else (
+    if "%USE_CUDA%"=="1" (
+        7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\nvfuser && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
+    ) else (
+        7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\"
+    )
+
+    if errorlevel 1 exit /b
+    if not errorlevel 0 exit /b
+
+    :: export test times so that potential sharded tests that'll branch off this build will use consistent data
+    python tools/stats/export_test_times.py
+    copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
+
+    :: Also save build/.ninja_log as an artifact
+    copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
+  )
+)
+
+sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
+sccache --stop-server
--- a/.ci/pytorch/win-test-helpers/choose_runtime_cuda_version.bat
+++ b/.ci/pytorch/win-test-helpers/choose_runtime_cuda_version.bat
@ -0,0 +1,4 @@
+REM The first argument should the CUDA version
+echo %PATH%
+echo %CUDA_PATH%
+set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%1\bin;%PATH%
--- a/.ci/pytorch/win-test-helpers/install_test_functorch.bat
+++ b/.ci/pytorch/win-test-helpers/install_test_functorch.bat
@ -0,0 +1,19 @@
+call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
+:: exit the batch once there's an error
+if not errorlevel 0 (
+  echo "setup pytorch env failed"
+  echo %errorlevel%
+  exit /b
+)
+
+echo "Test functorch"
+pushd test
+python run_test.py --functorch --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
+popd
+if ERRORLEVEL 1 goto fail
+
+:eof
+exit /b 0
+
+:fail
+exit /b 1
--- a/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
@ -0,0 +1,26 @@
+if "%BUILD_ENVIRONMENT%"=="" (
+  set CONDA_PARENT_DIR=%CD%
+) else (
+  set CONDA_PARENT_DIR=C:\Jenkins
+)
+
+
+:: Be conservative here when rolling out the new AMI with conda. This will try
+:: to install conda as before if it couldn't find the conda installation. This
+:: can be removed eventually after we gain enough confidence in the AMI
+if not exist %CONDA_PARENT_DIR%\Miniconda3 (
+  set INSTALL_FRESH_CONDA=1
+)
+
+if "%INSTALL_FRESH_CONDA%"=="1" (
+  curl --retry 3 --retry-all-errors -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+
+  %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+)
+
+:: Activate conda so that we can use its commands, i.e. conda, python, pip
+call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_magma.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_magma.bat
@ -0,0 +1,37 @@
+if "%CUDA_VERSION%" == "cpu" (
+  echo skip magma installation for cpu builds
+  exit /b 0
+)
+
+rem remove dot in cuda_version, fox example 11.1 to 111
+
+if not "%USE_CUDA%"=="1" (
+    exit /b 0
+)
+
+if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
+    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
+    exit /b 1
+)
+
+set VERSION_SUFFIX=%CUDA_VERSION:.=%
+set CUDA_SUFFIX=cuda%VERSION_SUFFIX%
+
+if "%CUDA_SUFFIX%" == "" (
+  echo unknown CUDA version, please set `CUDA_VERSION` higher than 10.2
+  exit /b 1
+)
+
+if "%REBUILD%"=="" (
+  if "%BUILD_ENVIRONMENT%"=="" (
+    curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --output %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z
+  ) else (
+    aws s3 cp s3://ossci-windows/magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z --quiet
+  )
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+  7z x -aoa %TMP_DIR_WIN%\magma_2.5.4_%CUDA_SUFFIX%_%BUILD_TYPE%.7z -o%TMP_DIR_WIN%\magma
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+)
+set MAGMA_HOME=%TMP_DIR_WIN%\magma
--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_mkl.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_mkl.bat
@ -0,0 +1,14 @@
+if "%REBUILD%"=="" (
+  if "%BUILD_ENVIRONMENT%"=="" (
+    curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z --output %TMP_DIR_WIN%\mkl.7z
+  ) else (
+    aws s3 cp s3://ossci-windows/mkl_2020.2.254.7z %TMP_DIR_WIN%\mkl.7z --quiet
+  )
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+  7z x -aoa %TMP_DIR_WIN%\mkl.7z -o%TMP_DIR_WIN%\mkl
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
+)
+set CMAKE_INCLUDE_PATH=%TMP_DIR_WIN%\mkl\include
+set LIB=%TMP_DIR_WIN%\mkl\lib;%LIB%
--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_sccache.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_sccache.bat
@ -0,0 +1,18 @@
+mkdir %TMP_DIR_WIN%\bin
+
+if "%REBUILD%"=="" (
+  :check_sccache
+  %TMP_DIR_WIN%\bin\sccache.exe --show-stats || (
+    taskkill /im sccache.exe /f /t || ver > nul
+    del %TMP_DIR_WIN%\bin\sccache.exe || ver > nul
+    del %TMP_DIR_WIN%\bin\sccache-cl.exe || ver > nul
+    if "%BUILD_ENVIRONMENT%"=="" (
+      curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %TMP_DIR_WIN%\bin\sccache.exe
+      curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output %TMP_DIR_WIN%\bin\sccache-cl.exe
+    ) else (
+      aws s3 cp s3://ossci-windows/sccache.exe %TMP_DIR_WIN%\bin\sccache.exe
+      aws s3 cp s3://ossci-windows/sccache-cl.exe %TMP_DIR_WIN%\bin\sccache-cl.exe
+    )
+    goto :check_sccache
+  )
+)
--- a/.ci/pytorch/win-test-helpers/run_python_nn_smoketests.py
+++ b/.ci/pytorch/win-test-helpers/run_python_nn_smoketests.py
@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import subprocess
+import os
+
+COMMON_TESTS = [
+    (
+        "Checking that torch is available",
+        "import torch",
+    ),
+    (
+        "Checking that MKL is available",
+        "import torch; exit(0 if torch.backends.mkl.is_available() else 1)",
+    ),
+]
+
+GPU_TESTS = [
+    (
+        "Checking that CUDA archs are setup correctly",
+        "import torch; torch.randn([3,5]).cuda()",
+    ),
+    (
+        "Checking that magma is available",
+        "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)",
+    ),
+    (
+        "Checking that CuDNN is available",
+        "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)",
+    ),
+]
+
+
+if __name__ == "__main__":
+
+    if 'USE_CUDA' in os.environ and os.environ['USE_CUDA'] == '1':
+        TESTS = COMMON_TESTS + GPU_TESTS
+    else:
+        TESTS = COMMON_TESTS
+    for description, python_commands in TESTS:
+        print(description)
+        command_args = ["python", "-c", python_commands]
+        command_string = " ".join(command_args)
+        print("Command:", command_string)
+        try:
+            subprocess.check_call(command_args)
+        except subprocess.CalledProcessError as e:
+            sdk_root = os.environ.get('WindowsSdkDir', 'C:\\Program Files (x86)\\Windows Kits\\10')
+            debugger = os.path.join(sdk_root, 'Debuggers', 'x64', 'cdb.exe')
+            if os.path.exists(debugger):
+                command_args = [debugger, "-o", "-c", "~*g; q"] + command_args
+                command_string = " ".join(command_args)
+                print("Reruning with traceback enabled")
+                print("Command:", command_string)
+                subprocess.run(command_args, check=False)
+            exit(e.returncode)
--- a/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
+++ b/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
@ -0,0 +1,73 @@
+if exist "%TMP_DIR%/ci_scripts/pytorch_env_restore.bat" (
+    call %TMP_DIR%/ci_scripts/pytorch_env_restore.bat
+    exit /b 0
+)
+
+set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
+
+:: Install Miniconda3
+set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
+
+:: Miniconda has been installed as part of the Windows AMI with all the dependencies.
+:: We just need to activate it here
+call %INSTALLER_DIR%\activate_miniconda3.bat
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+
+pushd .
+if "%VC_VERSION%" == "" (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
+) else (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
+)
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
+@echo on
+popd
+
+set DISTUTILS_USE_SDK=1
+
+if not "%USE_CUDA%"=="1" goto cuda_build_end
+
+set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
+
+rem version transformer, for example 10.1 to 10_1.
+set VERSION_SUFFIX=%CUDA_VERSION:.=_%
+set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
+
+set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
+set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
+set CUDNN_ROOT_DIR=%CUDA_PATH%
+set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
+set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
+set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
+set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
+set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
+
+:cuda_build_end
+
+set PYTHONPATH=%TMP_DIR_WIN%\build;%PYTHONPATH%
+
+if NOT "%BUILD_ENVIRONMENT%"=="" (
+    pushd %TMP_DIR_WIN%\build
+    copy /Y %PYTORCH_FINAL_PACKAGE_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %TMP_DIR_WIN%\
+    :: 7z: -aos skips if exists because this .bat can be called multiple times
+    7z x %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z -aos
+    popd
+) else (
+    xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
+)
+
+@echo off
+echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
+for /f "usebackq tokens=*" %%i in (`set`) do echo set "%%i" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat
+@echo on
+
+if NOT "%BUILD_ENVIRONMENT%" == "" (
+  :: Create a shortcut to restore pytorch environment
+  echo @echo off >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+  echo call "%TMP_DIR_WIN%/ci_scripts/pytorch_env_restore.bat" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+  echo cd /D "%CD%" >> %TMP_DIR_WIN%/ci_scripts/pytorch_env_restore_helper.bat
+
+  aws s3 cp "s3://ossci-windows/Restore PyTorch Environment.lnk" "C:\Users\circleci\Desktop\Restore PyTorch Environment.lnk"
+)
--- a/.ci/pytorch/win-test-helpers/test_custom_backend.bat
+++ b/.ci/pytorch/win-test-helpers/test_custom_backend.bat
@ -0,0 +1,36 @@
+call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
+
+git submodule update --init --recursive third_party/pybind11
+cd test\custom_backend
+
+:: Build the custom backend library.
+mkdir build
+pushd build
+
+echo "Executing CMake for custom_backend test..."
+
+:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
+cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
+if ERRORLEVEL 1 exit /b 1
+
+echo "Executing Ninja for custom_backend test..."
+
+ninja -v
+if ERRORLEVEL 1 exit /b 1
+
+echo "Ninja succeeded for custom_backend test."
+
+popd
+
+:: Run tests Python-side and export a script module.
+python test_custom_backend.py -v
+if ERRORLEVEL 1 exit /b 1
+
+python backend.py --export-module-to="build/model.pt"
+if ERRORLEVEL 1 exit /b 1
+
+:: Run tests C++-side and load the exported script module.
+cd build
+set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
+test_custom_backend.exe model.pt
+if ERRORLEVEL 1 exit /b 1
--- a/.ci/pytorch/win-test-helpers/test_custom_script_ops.bat
+++ b/.ci/pytorch/win-test-helpers/test_custom_script_ops.bat
@ -0,0 +1,41 @@
+call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
+
+git submodule update --init --recursive third_party/pybind11
+cd test\custom_operator
+
+:: Build the custom operator library.
+mkdir build
+pushd build
+
+echo "Executing CMake for custom_operator test..."
+
+:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
+cmake -DCMAKE_PREFIX_PATH=%TMP_DIR_WIN%\build\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
+if ERRORLEVEL 1 exit /b 1
+
+echo "Executing Ninja for custom_operator test..."
+
+ninja -v
+if ERRORLEVEL 1 exit /b 1
+
+echo "Ninja succeeded for custom_operator test."
+
+popd
+
+:: Run tests Python-side and export a script module.
+python test_custom_ops.py -v
+if ERRORLEVEL 1 exit /b 1
+
+:: TODO: fix and re-enable this test
+:: See https://github.com/pytorch/pytorch/issues/25155
+:: python test_custom_classes.py -v
+:: if ERRORLEVEL 1 exit /b 1
+
+python model.py --export-script-module="build/model.pt"
+if ERRORLEVEL 1 exit /b 1
+
+:: Run tests C++-side and load the exported script module.
+cd build
+set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
+test_custom_ops.exe model.pt
+if ERRORLEVEL 1 exit /b 1
--- a/.ci/pytorch/win-test-helpers/test_distributed.bat
+++ b/.ci/pytorch/win-test-helpers/test_distributed.bat
@ -0,0 +1,24 @@
+REM The first argument should lead to the python interpreter
+%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_common
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_gloo
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python.exe test/run_test.py --verbose -i distributed/test_c10d_nccl
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python.exe test/run_test.py --verbose -i distributed/test_data_parallel
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python.exe test/run_test.py --verbose -i distributed/test_store
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
+%1\python.exe test/run_test.py --verbose -i distributed/test_pg_wrapper
+if %errorlevel% neq 0 ( exit /b %errorlevel% )
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`raise ModuleNotFoundError("Sorry PyTorch, but our NumPy is in the other folder")`