220 lines
7.2 KiB
TeX
220 lines
7.2 KiB
TeX
%
|
|
% Pytorch.tex
|
|
%
|
|
% AI Documentation
|
|
%
|
|
% Copyright (C) 2022, 2023, Jeff Moe
|
|
%
|
|
% This document is licensed under the Creative Commons Attribution 4.0
|
|
% International Public License (CC BY-SA 4.0) by Jeff Moe.
|
|
%
|
|
|
|
\section{Installation}
|
|
Pytorch gets installed via pip.
|
|
|
|
Often it is better to install Pytorch first ``by hand'', and not just use
|
|
it as installed via requirements.txt, as that may not have the exact version
|
|
that is best for the hardware at hand.
|
|
|
|
Take a look first what version of Python and Pytorch the project is most
|
|
``comfortable'' with by looking at files like setup.py, requirements.txt
|
|
and it's other iterations, .yaml files, Docker junk maybe sitting around,
|
|
etc. Pain can be avoided by using the version of Python the project is
|
|
written for, unless you want to port to a newer version or something.
|
|
If it is Python 2.x, it needs porting or finding of a replacement.
|
|
|
|
Also, same for Pytorch. See what version the project works best with.
|
|
Sometimes a later version will work fine as well. It isn't the worst idea to
|
|
just use the latest Pytorch stable release and see how it goes...
|
|
|
|
The Pytorch website has a nice little interface for helping you pick the
|
|
correct pip command, depending what hardware available and which Pytorch
|
|
version needed. Site:
|
|
|
|
* \url{https://pytorch.org/get-started/locally/}
|
|
|
|
The default stable release, currently 2.0.1, with CUDA 11.7 is installed
|
|
thusly:
|
|
|
|
\begin{minted}{sh}
|
|
pip3 install torch torchvision torchaudio
|
|
\end{minted}
|
|
|
|
With Nvidia A16, A40, and A100, these worked with Pytorch stable and
|
|
CUDA 11.8:
|
|
|
|
\begin{minted}{sh}
|
|
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
\end{minted}
|
|
|
|
If a recent AMD GPU is available that has ROCm support, Pytorch
|
|
is installed thusly (untested; available AMD GPU was too old):
|
|
|
|
\begin{minted}{sh}
|
|
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2
|
|
\end{minted}
|
|
|
|
It may be possible to use a CPU, but will generally suck. To install:
|
|
|
|
\begin{minted}{sh}
|
|
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
|
\end{minted}
|
|
|
|
If CUDA 12.1 is needed, Pytorch nightly is required:
|
|
|
|
\begin{minted}{sh}
|
|
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
|
|
\end{minted}
|
|
|
|
Other combinations are available at the site.
|
|
Commands for older versions are avaiable here:
|
|
|
|
* https://pytorch.org/get-started/previous-versions/
|
|
|
|
XXX
|
|
Below will likely mess up your machine.
|
|
|
|
Lots of noise below, perhaps rebuilding pytorch with nvidia and older
|
|
GCC toolchain (?). Generally not needed....
|
|
|
|
\begin{minted}{sh}
|
|
for i in `apt-cache search cuda|sort -V| grep cuda | grep 11-8 | grep -v -e cuda-demo-suite-11-8 -e cuda-documentation-11-8 -e cuda-gdb-11-8 -e cuda-gdb-src-11-8 | cut -f 1 -d " " ` ; do echo $i ; apt install $i ; echo ;done
|
|
|
|
###################################################
|
|
# build pytorch
|
|
# need non-free to install intel-mkl
|
|
apt install libzstd-dev libnuma-dev clang gcc-11 libfftw3-dev libopenblas64-dev libopenblas-dev libblis-dev libblis64-dev libclblast-dev libxnnpack-dev libopenblas-openmp-dev libopenblas64-openmp-dev libopenmpi-dev libcudnn8-dev intel-mkl-full nvidia-cuda-toolkit nvidia-cuda-toolkit-gcc
|
|
sudo apt install `apt-cache search opencv|grep -i opencv |sort -V|grep dev |cut -f 1 -d " "`
|
|
libmagma-dev libarmnn-dev libonnx-dev
|
|
sudo apt install `apt-cache search lapack|grep -- -dev | cut -f 1 -d " " | grep -v ghc`
|
|
libmimalloc-dev
|
|
sudo apt install `apt-cache search vulkan | grep -- -dev | grep -i vulkan | cut -f 1 -d " "`
|
|
glslc libshaderc-dev
|
|
libideep-dev
|
|
|
|
# using virtualenv from sd web
|
|
git clone pytorch
|
|
cd pytorch
|
|
git checkout XXX
|
|
git submodule sync
|
|
git submodule update --init --recursive
|
|
|
|
export CMAKE_CUDA_ARCHITECTURES=native
|
|
export CPUINFO_BUILD_TOOLS=1
|
|
export BUILD_BINARY=1
|
|
export TORCH_USE_CUDA_DSA=1
|
|
export USE_CUDA=1
|
|
export USE_OPENCV=1
|
|
export USE_FFMPEG=1
|
|
export USE_ZSTD=1
|
|
-DCMAKE_CUDA_COMPILER:PATH=/usr/local/cuda/bin/nvcc
|
|
#python setup.py develop
|
|
mkdir build
|
|
cd build/
|
|
cmake ..
|
|
ccmake ..
|
|
###################################################
|
|
sudo update-alternatives --remove-all gcc
|
|
sudo update-alternatives --remove-all g++
|
|
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
|
|
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20
|
|
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
|
|
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 20
|
|
sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
|
|
sudo update-alternatives --set cc /usr/bin/gcc
|
|
sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
|
|
sudo update-alternatives --set c++ /usr/bin/g++
|
|
sudo update-alternatives --config gcc
|
|
sudo update-alternatives --config g++
|
|
|
|
pip install mkl-devel
|
|
|
|
sudo apt remove --purge --autoremove libcu++-dev
|
|
sudo apt install nvidia-cuda-dev
|
|
libleveldb-dev
|
|
|
|
# ok
|
|
# rm -rf build ; CUDACXX=/usr/local/cuda/bin/nvcc GCC="/usr/bin/gcc-11" CMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc python setup.py build
|
|
# Test
|
|
#rm -rf build ; CUDACXX=/usr/local/cuda/bin/nvcc CMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc python setup.py build
|
|
cd .. ; rm -rf build ; mkdir build ; cd build/ ; \
|
|
cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DENABLE_CUDA=ON -DUSE_CUDA=ON -DUSE_CUDNN=ON -DUSE_NVRTC=ON -DUSE_OPENCV=ON -DUSE_ZSTD=ON -DUSE_MKL=ON -DBUILD_CAFFE2=ON .. ; \
|
|
make -j12
|
|
ccmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DENABLE_CUDA=ON -DUSE_CUDA=ON -DUSE_CUDNN=ON -DUSE_NVRTC=ON -DUSE_OPENCV=ON -DUSE_ZSTD=ON -DUSE_MKL=ON -DBUILD_CAFFE2=ON ..
|
|
|
|
# ? -Wno-dev
|
|
# /home/jebba/devel/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp:412:73: error: ISO C++ forbids comparison between pointer and integer [-fpermissive]
|
|
|
|
# nope
|
|
sudo apt purge libideep-dev
|
|
-DBUILD_CAFFE2=ON
|
|
-DUSE_VULKAN=ON
|
|
-DBUILD_BINARY=ON
|
|
-DUSE_OPENCL=ON
|
|
-DUSE_FFMPEG=ON
|
|
|
|
# XXX where?
|
|
TORCH_USE_CUDA_DSA
|
|
\end{minted}
|
|
|
|
|
|
\section{Pytorch Test}
|
|
Quick and dirty verify.py script to run after installing Pytorch to make
|
|
sure everything is ok:
|
|
|
|
\begin{minted}{python}
|
|
#!/usr/bin/env python3
|
|
|
|
import torch
|
|
x = torch.rand(5, 3)
|
|
print(x)
|
|
|
|
torch.cuda.set_device(0)
|
|
print("CUDA:", torch.cuda.is_available())
|
|
|
|
print("Pytorch Version:", torch.__version__)
|
|
\end{minted}
|
|
|
|
|
|
\section{Torch POWER}
|
|
Running pytorch on ppc64le.
|
|
In particular, the Raptor Computing Talos II motherboard.
|
|
|
|
|
|
\begin{minted}{sh}
|
|
# Set the compilers, gcc-12 is nogo.
|
|
# Using ccache...
|
|
export CMAKE_C_COMPILER=/usr/lib/ccache/gcc-11
|
|
export CMAKE_CXX_COMPILER=/usr/lib/ccache/g++-11
|
|
|
|
# First time clone:
|
|
git clone --recursive https://github.com/pytorch/pytorch
|
|
# And subsequently:
|
|
cd pytorch/
|
|
# Clean behind your ears:
|
|
git checkout main
|
|
git reset --hard HEAD
|
|
git clean -ff
|
|
git pull
|
|
git submodule update --init
|
|
git submodule update
|
|
# Get latest release
|
|
git tag -l | sort -V | grep -v -e rc | tail -1
|
|
git checkout `git tag -l | sort -V | grep -v -e rc | tail -1`
|
|
|
|
# Virtual environment for Python, such as:
|
|
source deactivate
|
|
rm -rf venv
|
|
pyenv local 3.12
|
|
virtualenv -p 3.12 venv
|
|
source venv/bin/activate
|
|
pip install -U setuptools wheel pip
|
|
pip install -r requirements.txt
|
|
|
|
# Set options, install wanted dependencies:
|
|
ccmake build -DCUDAToolkit_INCLUDE_DIR=/usr/include
|
|
|
|
# Install into virtual environment
|
|
python setup.py install
|
|
\end{minted}
|