1
0
Fork 0

Update torch, new builds, benchmark notes

main
Jeff Moe 2024-02-25 13:54:50 -07:00
parent e47e7290eb
commit ef489032a3
6 changed files with 125 additions and 15 deletions

View File

@ -1,7 +1,7 @@
# ubuntu
git clone --recursive https://github.com/ggerganov/llama.cpp
cd llama.cpp/
rm -rf build
# ubuntu
cmake -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
@ -20,13 +20,12 @@ cmake -B build -G Ninja \
-DLLAMA_AVX2=ON \
-DLLAMA_AVX=ON \
-DLLAMA_HIPBLAS=ON \
-DLLAMA_FMA=ON \
-DLLAMA_LTO=ON \
-DLLAMA_HIP_UMA=OFF \
-DLLAMA_QKK_64=OFF \
-DLLAMA_VULKAN=OFF \
-DLLAMA_F16C=ON \
-DAMDGPU_TARGETS=gfx1100
ninja -C build
exit
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-DCMAKE_C_COMPILER=hipcc \
-DLLAMA_HIP_UMA=ON \ # Real slow i think
-DLLAMA_MPI

View File

@ -3,13 +3,20 @@ cd tinygrad/
python3 -m venv venv
source venv/bin/activate
pip install -U setuptools pip wheel
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.0
pip install --pre torchaudio torchvision --index-url https://download.pytorch.org/whl/nightly/rocm5.7
# Tinygrad from git repo
pip install -e .
# or from pypi
# pip install tinygrad
# To run the various examples and benchmarks:
# Torch's ROCm version of Torch:
# pip install --pre torch torchaudio torchvision --index-url https://download.pytorch.org/whl/nightly/rocm6.0
# Or AMD ROCm version of Torch:
pip install torch==2.1.1 torchvision==0.16.1 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0
pip install librosa nltk phonemizer protobuf pyyaml \
sentencepiece tiktoken unidecode gymnasium pytest hypothesis \
pillow opencv-python tensorflow ultralytics onnx pygame ctypeslib2 \
tf2onnx lm_eval onnxruntime pydot tensorflow_addons
# If portaudio.h is available
pip install pyaudio
# pip install -e .
pip install tinygrad

View File

@ -3,6 +3,6 @@ cd llama.cpp/build
--n-gpu-layers 1024 \
-c 4096 \
--host 0.0.0.0 \
-t 128 \
-t 16 \
--split-mode layer \
-m /srv/ml/huggingface/TheBloke/Phind-CodeLlama-34B-v2-GGUF/phind-codellama-34b-v2.Q8_0.gguf

View File

@ -127,3 +127,59 @@ The upstream tinycorp is working on implementing some of their benchmarks using
`<https://github.com/mlcommons>`_
Phoronix Test Suite
===================
Phoronix test suite:
`<https://github.com/phoronix-test-suite/phoronix-test-suite/>`_
`<https://www.phoronix-test-suite.com/>`_
.. code-block:: sh
git clone https://github.com/phoronix-test-suite/phoronix-test-suite/
cd phoronix-test-suite/
apt install php-cli php-xml
./phoronix-test-suite list-missing-dependencies
./phoronix-test-suite list-tests
Meh, this automatically installs dependencies and builds, but doesn't use ROCm.
ROCm
====
Benchmarks optimized for ROCm.
HPL
---
HPL for ROCm from AMD.
.. code-block:: sh
git clone https://github.com/ROCm/rocHPL
cd rocHPL/
# git checkout v6.0.0 # build fails in Ubuntu
./install.sh
# ./build/bin/rochpl --input ./build/rocHPL/HPL.dat
# 1 GPU (works then fails subsequent runs)
./mpirun_rochpl -P 1 -Q 1 -N 45056 --NB 384
Node Binding: Process 0 [(p,q)=(0,0)] CPU Cores: 64 - {0-63}
GPU Binding: Process 0 [(p,q)=(0,0)] GPU: 0, pciBusID c3
Local matrix size = 15.1361 GBs
./mpirun_rochpl -P 1 -Q 2 -N 64000 --NB 384
./mpirun_rochpl -P 2 -Q 2 -N 90112 --NB 384
./mpirun_rochpl -P 2 -Q 4 -N 128000 --NB 384
HPCG
----
HPCG for ROCm.
.. code-block:: sh
git clone https://github.com/ROCm/rocHPCG
cd rocHPCG/
./install.sh

View File

@ -9,7 +9,7 @@ msgid ""
msgstr ""
"Project-Id-Version: tinyrocs: Direct to Chip Liquid Cooled GPU AI Cluster 0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2024-02-11 09:39-0700\n"
"POT-Creation-Date: 2024-02-25 13:54-0700\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: en\n"
@ -269,3 +269,49 @@ msgstr ""
#: ../../../_source/benchmarks.rst:128
msgid "`<https://github.com/mlcommons>`_"
msgstr ""
#: ../../../_source/benchmarks.rst:132
msgid "Phoronix Test Suite"
msgstr ""
#: ../../../_source/benchmarks.rst:133
msgid "Phoronix test suite:"
msgstr ""
#: ../../../_source/benchmarks.rst:135
msgid "`<https://github.com/phoronix-test-suite/phoronix-test-suite/>`_"
msgstr ""
#: ../../../_source/benchmarks.rst:137
msgid "`<https://www.phoronix-test-suite.com/>`_"
msgstr ""
#: ../../../_source/benchmarks.rst:147
msgid ""
"Meh, this automatically installs dependencies and builds, but doesn't use "
"ROCm."
msgstr ""
#: ../../../_source/benchmarks.rst:151
msgid "ROCm"
msgstr ""
#: ../../../_source/benchmarks.rst:152
msgid "Benchmarks optimized for ROCm."
msgstr ""
#: ../../../_source/benchmarks.rst:155
msgid "HPL"
msgstr ""
#: ../../../_source/benchmarks.rst:156
msgid "HPL for ROCm from AMD."
msgstr ""
#: ../../../_source/benchmarks.rst:176
msgid "HPCG"
msgstr ""
#: ../../../_source/benchmarks.rst:177
msgid "HPCG for ROCm."
msgstr ""

View File

@ -152,7 +152,9 @@ ROCm for Ubuntu.
llvm-amdgpu llvm-amdgpu-runtime rocm-dkms rocm-dev rocm-libs \
rocm-khronos-cts rocm-ocltst rocm-validation-suite \
smi-lib-amdgpu smi-lib-amdgpu-dev \
libstdc++-12-dev
libstdc++-12-dev python-is-python3 \
vulkan-amdgpu libvulkan-dev libvulkan-volk-dev vulkan-tools \
vulkan-validationlayers-dev glslang-dev glslang-tools
# sudo apt purge --autoremove libc6-dev-i386 libc6-dev-x32
sudo apt install gcc-multilib