Update torch, new builds, benchmark notes

2024-02-25 13:54:50 -07:00 · 2024-02-25 13:54:50 -07:00 · ef489032a3
parent e47e7290eb
commit ef489032a3
6 changed files with 125 additions and 15 deletions
--- a/docs/_source/_static/apps/build-llamacpp.sh
+++ b/docs/_source/_static/apps/build-llamacpp.sh
@ -1,7 +1,7 @@
+# ubuntu
 git clone --recursive https://github.com/ggerganov/llama.cpp
 cd llama.cpp/
 rm -rf build
-# ubuntu
 cmake -B build -G Ninja \
  -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
@ -20,13 +20,12 @@ cmake -B build -G Ninja \
  -DLLAMA_AVX2=ON \
  -DLLAMA_AVX=ON \
  -DLLAMA_HIPBLAS=ON \
+  -DLLAMA_FMA=ON \
+  -DLLAMA_LTO=ON \
+  -DLLAMA_HIP_UMA=OFF \
+  -DLLAMA_QKK_64=OFF \
+  -DLLAMA_VULKAN=OFF \
+  -DLLAMA_F16C=ON \
  -DAMDGPU_TARGETS=gfx1100

 ninja -C build
-exit
-
-  -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-  -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-  -DCMAKE_C_COMPILER=hipcc \
-  -DLLAMA_HIP_UMA=ON \ # Real slow i think
-  -DLLAMA_MPI
--- a/docs/_source/_static/apps/build-tinygrad.sh
+++ b/docs/_source/_static/apps/build-tinygrad.sh
@ -3,13 +3,20 @@ cd tinygrad/
 python3 -m venv venv
 source venv/bin/activate
 pip install -U setuptools pip wheel
-pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.0
-pip install --pre torchaudio torchvision --index-url https://download.pytorch.org/whl/nightly/rocm5.7
+# Tinygrad from git repo
+pip install -e .
+# or from pypi
+# pip install tinygrad
+
+# To run the various examples and benchmarks:
+# Torch's ROCm version of Torch:
+# pip install --pre torch torchaudio torchvision --index-url https://download.pytorch.org/whl/nightly/rocm6.0
+# Or AMD ROCm version of Torch:
+pip install torch==2.1.1 torchvision==0.16.1 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0
+
 pip install librosa nltk phonemizer protobuf pyyaml \
  sentencepiece tiktoken unidecode gymnasium pytest hypothesis \
  pillow opencv-python tensorflow ultralytics onnx pygame ctypeslib2 \
  tf2onnx lm_eval onnxruntime pydot tensorflow_addons
 # If portaudio.h is available
 pip install pyaudio 
-# pip install -e .
-pip install tinygrad
--- a/docs/_source/_static/apps/run-llamacpp.sh
+++ b/docs/_source/_static/apps/run-llamacpp.sh
@ -3,6 +3,6 @@ cd llama.cpp/build
    --n-gpu-layers 1024 \
    -c 4096 \
    --host 0.0.0.0 \
-    -t 128 \
+    -t 16 \
    --split-mode layer \
    -m /srv/ml/huggingface/TheBloke/Phind-CodeLlama-34B-v2-GGUF/phind-codellama-34b-v2.Q8_0.gguf
--- a/docs/_source/benchmarks.rst
+++ b/docs/_source/benchmarks.rst
@ -127,3 +127,59 @@ The upstream tinycorp is working on implementing some of their benchmarks using

  `<https://github.com/mlcommons>`_

+
+Phoronix Test Suite
+===================
+Phoronix test suite:
+
+  `<https://github.com/phoronix-test-suite/phoronix-test-suite/>`_
+
+  `<https://www.phoronix-test-suite.com/>`_
+
+.. code-block:: sh
+
+  git clone https://github.com/phoronix-test-suite/phoronix-test-suite/
+  cd phoronix-test-suite/
+  apt install php-cli php-xml
+  ./phoronix-test-suite list-missing-dependencies
+  ./phoronix-test-suite list-tests
+
+Meh, this automatically installs dependencies and builds, but doesn't use ROCm.
+
+
+ROCm
+====
+Benchmarks optimized for ROCm.
+
+HPL
+---
+HPL for ROCm from AMD.
+
+.. code-block:: sh
+
+  git clone https://github.com/ROCm/rocHPL
+  cd rocHPL/
+   # git checkout v6.0.0 # build fails in Ubuntu
+  ./install.sh
+  # ./build/bin/rochpl --input ./build/rocHPL/HPL.dat
+  # 1 GPU (works then fails subsequent runs)
+  ./mpirun_rochpl -P 1 -Q 1 -N  45056 --NB 384
+  Node Binding: Process 0 [(p,q)=(0,0)] CPU Cores: 64 - {0-63}
+  GPU  Binding: Process 0 [(p,q)=(0,0)] GPU: 0, pciBusID c3 
+  Local matrix size = 15.1361 GBs
+  ./mpirun_rochpl -P 1 -Q 2 -N  64000 --NB 384
+  ./mpirun_rochpl -P 2 -Q 2 -N  90112 --NB 384
+  ./mpirun_rochpl -P 2 -Q 4 -N  128000 --NB 384
+
+
+HPCG
+----
+HPCG for ROCm.
+
+.. code-block:: sh
+
+  git clone https://github.com/ROCm/rocHPCG
+  cd rocHPCG/
+  ./install.sh
+
+
--- a/docs/_source/locale/en/LC_MESSAGES/benchmarks.po
+++ b/docs/_source/locale/en/LC_MESSAGES/benchmarks.po
@ -9,7 +9,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: tinyrocs: Direct to Chip Liquid Cooled GPU AI Cluster 0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2024-02-11 09:39-0700\n"
+"POT-Creation-Date: 2024-02-25 13:54-0700\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: en\n"
@ -269,3 +269,49 @@ msgstr ""
 #: ../../../_source/benchmarks.rst:128
 msgid "`<https://github.com/mlcommons>`_"
 msgstr ""
+
+#: ../../../_source/benchmarks.rst:132
+msgid "Phoronix Test Suite"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:133
+msgid "Phoronix test suite:"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:135
+msgid "`<https://github.com/phoronix-test-suite/phoronix-test-suite/>`_"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:137
+msgid "`<https://www.phoronix-test-suite.com/>`_"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:147
+msgid ""
+"Meh, this automatically installs dependencies and builds, but doesn't use "
+"ROCm."
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:151
+msgid "ROCm"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:152
+msgid "Benchmarks optimized for ROCm."
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:155
+msgid "HPL"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:156
+msgid "HPL for ROCm from AMD."
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:176
+msgid "HPCG"
+msgstr ""
+
+#: ../../../_source/benchmarks.rst:177
+msgid "HPCG for ROCm."
+msgstr ""
--- a/docs/_source/ubuntu.rst
+++ b/docs/_source/ubuntu.rst
@ -152,7 +152,9 @@ ROCm for Ubuntu.
    llvm-amdgpu llvm-amdgpu-runtime rocm-dkms rocm-dev rocm-libs \
    rocm-khronos-cts rocm-ocltst rocm-validation-suite \
    smi-lib-amdgpu smi-lib-amdgpu-dev \
-    libstdc++-12-dev
+    libstdc++-12-dev python-is-python3 \
+    vulkan-amdgpu libvulkan-dev libvulkan-volk-dev vulkan-tools \
+    vulkan-validationlayers-dev glslang-dev glslang-tools
  
  # sudo apt purge --autoremove libc6-dev-i386 libc6-dev-x32
  sudo apt install gcc-multilib