ai-doc/src/Pytorch.tex

%
% Pytorch.tex
%
% AI Documentation
%
% Copyright (C) 2022, 2023, Jeff Moe
%
% This document is licensed under the Creative Commons Attribution 4.0
% International Public License (CC BY-SA 4.0) by Jeff Moe.
%

\section{Installation}
Pytorch gets installed via pip.

Often it is better to install Pytorch first ``by hand'', and not just use
it as installed via requirements.txt, as that may not have the exact version
that is best for the hardware at hand.

Take a look first what version of Python and Pytorch the project is most
``comfortable'' with by looking at files like setup.py, requirements.txt
and it's other iterations, .yaml files, Docker junk maybe sitting around,
etc. Pain can be avoided by using the version of Python the project is
written for, unless you want to port to a newer version or something.
If it is Python 2.x, it needs porting or finding of a replacement.

Also, same for Pytorch. See what version the project works best with.
Sometimes a later version will work fine as well. It isn't the worst idea to
just use the latest Pytorch stable release and see how it goes...

The Pytorch website has a nice little interface for helping you pick the
correct pip command, depending what hardware available and which Pytorch
version needed. Site:

* \url{https://pytorch.org/get-started/locally/}

The default stable release, currently 2.0.1, with CUDA 11.7 is installed
thusly:

\begin{minted}{sh}
pip3 install torch torchvision torchaudio
\end{minted}

With Nvidia A16, A40, and A100, these worked with Pytorch stable and
CUDA 11.8:

\begin{minted}{sh}
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
\end{minted}

If a recent AMD GPU is available that has ROCm support, Pytorch
is installed thusly (untested; available AMD GPU was too old):

\begin{minted}{sh}
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2
\end{minted}

It may be possible to use a CPU, but will generally suck. To install:

\begin{minted}{sh}
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
\end{minted}

If CUDA 12.1 is needed, Pytorch nightly is required:

\begin{minted}{sh}
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
\end{minted}

Other combinations are available at the site.
Commands for older versions are avaiable here:

* https://pytorch.org/get-started/previous-versions/

XXX
Below will likely mess up your machine.

Lots of noise below, perhaps rebuilding pytorch with nvidia and older
GCC toolchain (?). Generally not needed....

\begin{minted}{sh}
for i in `apt-cache search cuda|sort -V| grep cuda | grep 11-8 | grep -v -e cuda-demo-suite-11-8 -e cuda-documentation-11-8 -e cuda-gdb-11-8 -e cuda-gdb-src-11-8 | cut -f 1 -d " " ` ; do echo $i ; apt install $i ; echo ;done

###################################################
# build pytorch
# need non-free to install intel-mkl
apt install libzstd-dev libnuma-dev clang gcc-11 libfftw3-dev libopenblas64-dev libopenblas-dev libblis-dev libblis64-dev libclblast-dev libxnnpack-dev libopenblas-openmp-dev libopenblas64-openmp-dev libopenmpi-dev libcudnn8-dev intel-mkl-full nvidia-cuda-toolkit nvidia-cuda-toolkit-gcc
sudo apt install `apt-cache search opencv|grep -i opencv |sort -V|grep dev |cut -f 1 -d " "`
libmagma-dev libarmnn-dev libonnx-dev
sudo apt install  `apt-cache search lapack|grep -- -dev | cut -f 1 -d " " | grep -v ghc`
libmimalloc-dev
sudo apt install `apt-cache search vulkan | grep -- -dev | grep -i vulkan | cut -f 1 -d " "`
glslc libshaderc-dev
libideep-dev

# using virtualenv from sd web
git clone pytorch
cd pytorch
git checkout XXX
git submodule sync
git submodule update --init --recursive

export CMAKE_CUDA_ARCHITECTURES=native
export CPUINFO_BUILD_TOOLS=1
export BUILD_BINARY=1
export TORCH_USE_CUDA_DSA=1
export USE_CUDA=1
export USE_OPENCV=1
export USE_FFMPEG=1
export USE_ZSTD=1
-DCMAKE_CUDA_COMPILER:PATH=/usr/local/cuda/bin/nvcc
#python setup.py develop
mkdir build
cd build/
cmake ..
ccmake ..
###################################################
sudo update-alternatives --remove-all gcc
sudo update-alternatives --remove-all g++
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 20
sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
sudo update-alternatives --set cc /usr/bin/gcc
sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
sudo update-alternatives --set c++ /usr/bin/g++
sudo update-alternatives --config gcc
sudo update-alternatives --config g++

pip install mkl-devel

sudo apt remove --purge --autoremove libcu++-dev
sudo apt install nvidia-cuda-dev
libleveldb-dev

# ok
# rm -rf build ; CUDACXX=/usr/local/cuda/bin/nvcc GCC="/usr/bin/gcc-11" CMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc python setup.py build
# Test
#rm -rf build ; CUDACXX=/usr/local/cuda/bin/nvcc CMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc python setup.py build
cd .. ; rm -rf build ; mkdir build ; cd build/ ; \
cmake  -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DENABLE_CUDA=ON -DUSE_CUDA=ON -DUSE_CUDNN=ON -DUSE_NVRTC=ON -DUSE_OPENCV=ON -DUSE_ZSTD=ON -DUSE_MKL=ON -DBUILD_CAFFE2=ON .. ; \
make -j12
ccmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DENABLE_CUDA=ON -DUSE_CUDA=ON -DUSE_CUDNN=ON -DUSE_NVRTC=ON -DUSE_OPENCV=ON -DUSE_ZSTD=ON -DUSE_MKL=ON -DBUILD_CAFFE2=ON ..

# ? -Wno-dev
# /home/jebba/devel/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp:412:73: error: ISO C++ forbids comparison between pointer and integer [-fpermissive]

# nope
sudo apt purge libideep-dev
-DBUILD_CAFFE2=ON
-DUSE_VULKAN=ON
-DBUILD_BINARY=ON
-DUSE_OPENCL=ON
-DUSE_FFMPEG=ON

# XXX where?
TORCH_USE_CUDA_DSA
\end{minted}


\section{Pytorch Test}
Quick and dirty verify.py script to run after installing Pytorch to make
sure everything is ok:

\begin{minted}{python}
#!/usr/bin/env python3

import torch
x = torch.rand(5, 3)
print(x)

torch.cuda.set_device(0)
print("CUDA:", torch.cuda.is_available())

print("Pytorch Version:", torch.__version__)
\end{minted}


\section{Torch POWER}
Running pytorch on ppc64le.
In particular, the Raptor Computing Talos II motherboard.


\begin{minted}{sh}
# Set the compilers, gcc-12 is nogo.
# Using ccache...
export CMAKE_C_COMPILER=/usr/lib/ccache/gcc-11
export CMAKE_CXX_COMPILER=/usr/lib/ccache/g++-11

# First time clone:
git clone --recursive https://github.com/pytorch/pytorch
# And subsequently:
cd pytorch/
# Clean behind your ears:
git checkout main
git reset --hard HEAD
git clean -ff
git pull
git submodule update --init
git submodule update
# Get latest release
git tag -l | sort -V | grep -v -e rc | tail -1
git checkout `git tag -l | sort -V | grep -v -e rc | tail -1`

# Virtual environment for Python, such as:
source deactivate
rm -rf venv
pyenv local 3.12
virtualenv -p 3.12 venv
source venv/bin/activate
pip install -U setuptools wheel pip
pip install -r requirements.txt

# Set options, install wanted dependencies:
ccmake build -DCUDAToolkit_INCLUDE_DIR=/usr/include

# Install into virtual environment
python setup.py install
\end{minted}