tinygrab/.github/workflows/test.yml

name: Unit Tests
env:
  # increment this when downloads substantially change to avoid the internet
  DOWNLOAD_CACHE_VERSION: '3'

on:
  push:
    branches:
      - master
  pull_request:
  workflow_dispatch:

jobs:
  linter:
    name: Linters
    runs-on: ubuntu-latest
    timeout-minutes: 20

    # TODO: run the pre-commit hook to replace a lot of this
    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.8
      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: ${{ env.Python3_ROOT_DIR }}/lib/python3.8/site-packages
        key: linting-packages-${{ hashFiles('**/setup.py') }}-3.8
    - name: Install dependencies
      run: pip install -e '.[linting,testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Repo line count
      run: python sz.py
    - name: Lint with pylint
      run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string='  ' **/*.py
    - name: Lint with ruff
      run: |
        pip3 install --upgrade --force-reinstall ruff
        python3 -m ruff . --preview
    - name: Lint tinygrad with pylint
      run: python -m pylint tinygrad/
    - name: Run mypy
      run: python -m mypy
    - name: Install SLOCCount
      run: sudo apt install sloccount
    - name: Check <5000 lines
      run: sloccount tinygrad test examples extra; if [ $(sloccount tinygrad | sed -n 's/.*Total Physical Source Lines of Code (SLOC)[ ]*= \([^ ]*\).*/\1/p' | tr -d ',') -gt 5000 ]; then exit 1; fi
    - name: Test Docs
      run: |
        python docs/abstractions.py
        python docs/beautiful.py
    - name: Test Quickstart
      run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py &&  PYTHONPATH=. python quickstart.py
    - name: Fuzz Test symbolic
      run: python test/external/fuzz_symbolic.py
    - name: Fuzz Test shapetracker
      run: PYTHONPATH="." python test/external/fuzz_shapetracker.py
    - name: Test shapetracker to_movement_ops
      run: PYTHONPATH="." python extra/to_movement_ops.py
    - name: Use as an external package
      run: |
        mkdir $HOME/test_external_dir
        cd $HOME/test_external_dir
        python -m venv venv
        source venv/bin/activate
        pip install $GITHUB_WORKSPACE
        python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"

  testcpuimagenet:
    name: CPU and ImageNet to C Tests
    runs-on: ubuntu-latest
    timeout-minutes: 20

    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.8
      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: ${{ env.Python3_ROOT_DIR }}/lib/python3.8/site-packages
        key: testing-packages-${{ hashFiles('**/setup.py') }}
    - name: Cache downloads
      uses: actions/cache@v3
      with:
        path: ~/.cache/tinygrad/downloads/
        key: downloads-cache-cpu-${{ env.DOWNLOAD_CACHE_VERSION }}
    - name: Install Dependencies
      run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Run Pytest
      run: python -m pytest -n=auto test/ -k "not (test_efficientnet and models/test_train.py)" --durations=20
    - name: Compile EfficientNet to C
      run: PYTHONPATH="." CLANG=1 python examples/compile_efficientnet.py > recognize.c
    - name: Compile C to native
      run: clang -O2 recognize.c -lm -o recognize
    - name: Test EfficientNet
      run: curl https://media.istockphoto.com/photos/hen-picture-id831791190 | ./recognize | grep hen

  testtorch:
    name: Torch Tests
    runs-on: ubuntu-latest
    timeout-minutes: 20

    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.11
      uses: actions/setup-python@v4
      with:
        python-version: 3.11
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
        key: testing-packages-${{ hashFiles('**/setup.py') }}
    - name: Cache downloads
      uses: actions/cache@v3
      with:
        path: ~/.cache/tinygrad/downloads/
        key: downloads-cache-torch-${{ env.DOWNLOAD_CACHE_VERSION }}
    - name: Install Dependencies
      run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Run Pytest
      run: TORCH=1 python -m pytest -n=auto test/ --durations=20
    - name: Run ONNX
      run: TORCH=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py

  testopencl:
    strategy:
      fail-fast: false
      matrix:
        task: [optimage, openpilot, onnx]
    name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests' || matrix.task=='openpilot'&&'openpilot (OpenCL) Tests' || matrix.task=='onnx'&&'ONNX+Optimization Tests' }}
    runs-on: ubuntu-20.04
    timeout-minutes: 20

    steps:
      - name: Checkout Code
        uses: actions/checkout@v3
      - name: Install OpenCL
        run: |
          echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
          echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
          sudo apt update
          sudo apt install --allow-unauthenticated -y --no-install-recommends \
            intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \
            intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \
            intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16
      - name: Set up Python 3.11
        uses: actions/setup-python@v4
        with:
          python-version: 3.11
      - name: Cache python packages
        uses: actions/cache@v3
        with:
          path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
          key: testing-packages-${{ hashFiles('**/setup.py') }}
      - name: Cache downloads
        uses: actions/cache@v3
        with:
          path: ~/.cache/tinygrad/downloads/
          key: downloads-cache-${{ matrix.task }}-${{ env.DOWNLOAD_CACHE_VERSION }}
      - name: Install Dependencies
        run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
      - if: ${{ matrix.task == 'optimage' }}
        name: Run Optimizer Test (OPT 2 and 3)
        run: |
          PYTHONPATH="." OPT=2 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
          PYTHONPATH="." OPT=3 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
      - if: ${{ matrix.task == 'optimage'}}
        name: Test WINO=1
        run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d
      - if: ${{ matrix.task == 'optimage'}}
        name: Test GPU IMAGE=1 ops
        run: GPU=1 IMAGE=1 python -m pytest -n=auto test/test_ops.py
      - if: ${{ matrix.task == 'optimage'}}
        name: Test GPU IMAGE=2 ops
        run: GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py
      - if: ${{ matrix.task == 'openpilot' }}
        name: Test openpilot model compile and size
        run: |
          DEBUG=2 ALLOWED_KERNEL_COUNT=207 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py
          #python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000'
      - if: ${{ matrix.task == 'openpilot' }}
        name: Test openpilot model correctness (float32)
        run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py
      - if: ${{ matrix.task == 'openpilot' }}
        name: Test openpilot alt model correctness (float32)
        run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx
      - if: ${{ matrix.task == 'openpilot' }}
        name: Test openpilot fastvits model correctness (float32)
        run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx
      #- if: ${{ matrix.task == 'openpilot' }}
      #  name: Test multigpu
      #  run: |
      #    PYTHONPATH="." python test/external/dist/test_world.py
      #    PYTHONPATH="." python test/external/dist/test_collectives.py
      - if: ${{ matrix.task == 'onnx' }}
        name: Test ONNX (CPU)
        run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - if: ${{ matrix.task == 'onnx' }}
        name: Test ONNX (GPU)
        run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - if: ${{ matrix.task == 'onnx' }}
        name: Test ONNX (CLANG)
        run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - if: ${{ matrix.task == 'onnx' }}
        name: Test Action Space
        run: PYTHONPATH="." GPU=1 python3 extra/optimization/get_action_space.py
      - if: ${{ matrix.task == 'onnx' }}
        name: Test Beam Search
        run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py

  testwebgpu:
    name: WebGPU Tests
    runs-on: macos-13
    timeout-minutes: 20
    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.11
      uses: actions/setup-python@v4
      with:
        python-version: 3.11
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: /Users/runner/Library/Python/3.11/lib/python/site-packages
        key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }}
    - name: Install Dependencies
      run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Cache downloads
      uses: actions/cache@v3
      with:
        path: ~/Library/Caches/tinygrad/downloads/
        key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }}
    - name: Check Device.DEFAULT (WEBGPU) and print some source
      run: |
        WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
        WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
    #- name: Run webgpu pytest
    #  run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto
    - name: Run selected webgpu tests
      run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto test/test_dtype.py test/test_jit.py
    - name: Build WEBGPU Efficientnet
      run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet
    - name: Install Puppeteer
      run: npm install puppeteer
    - name: Run WEBGPU Efficientnet
      run: node test/web/test_webgpu.js
    - name: Test LLaMA compile speed
      run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py

  testmetalwebgpu:
    name: Metal Tests
    runs-on: macos-13
    timeout-minutes: 20

    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.11
      uses: actions/setup-python@v4
      with:
        python-version: 3.11
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: /Users/runner/Library/Python/3.11/lib/python/site-packages
        key: metal-testing-user3-packages-${{ hashFiles('**/setup.py') }}
    - name: Install Dependencies
      run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Cache downloads
      uses: actions/cache@v3
      with:
        path: ~/Library/Caches/tinygrad/downloads/
        key: downloads-cache-metal-only-${{ env.DOWNLOAD_CACHE_VERSION }}
    - name: Check Device.DEFAULT (METAL) and print some source
      run: |
        METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT"
        METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
    - name: Run metal test
      run: METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --durations=20
    - name: Run ONNX
      run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py
    - name: Test tensor core ops
      run: METAL=1 TC=2 python test/test_ops.py TestOps.test_big_gemm

  testhipcompilation:
    name: HIP Compilation Tests
    runs-on: ubuntu-latest
    timeout-minutes: 20

    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
    - name: Set up Python 3.11
      uses: actions/setup-python@v4
      with:
        python-version: 3.11
    - name: Cache python packages
      uses: actions/cache@v3
      with:
        path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
        key: testing-packages-${{ hashFiles('**/setup.py') }}
    - name: Cache downloads
      uses: actions/cache@v3
      with:
        path: ~/.cache/tinygrad/downloads/
        key: downloads-cache-hipcompilation-${{ env.DOWNLOAD_CACHE_VERSION }}
    - name: Install HIP tools
      run: |
        echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
        wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
        # ROCm repository for jammy
        sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF'
        deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/debian jammy main
        EOF
        # Prefer packages from the rocm repository over system packages
        echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
        sudo apt update
        sudo apt install --no-install-recommends --allow-unauthenticated -y rocm-hip-libraries hip-dev
    - name: Install Python Dependencies
      run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
    - name: Test HIP compilation on RDNA3 [gfx1100]
      run: |
        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/hip/lib
        MOCKHIP=1 HIP=1 python -m pytest -s test/test_hip_rdna3.py test/test_symbolic_ops.py


  tests:
    strategy:
      fail-fast: false
      matrix:
        backend: [llvm, clang, gpu, cuda] #, triton] #, ptx]

    name: Tests on (${{ matrix.backend }})
    runs-on: ${{ matrix.backend == 'gpu'  && 'ubuntu-20.04' || 'ubuntu-latest' }}
    timeout-minutes: 20

    steps:
      - name: Checkout Code
        uses: actions/checkout@v3
      - name: Set up Python 3.11
        uses: actions/setup-python@v4
        with:
          python-version: 3.11
      - name: Cache python packages
        uses: actions/cache@v3
        with:
          path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
          key: ${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }}
      - name: Cache downloads
        uses: actions/cache@v3
        with:
          path: ~/.cache/tinygrad/downloads/
          key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }}
      - name: Set env
        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas'}}" >> $GITHUB_ENV
      - name: Install OpenCL
        if: matrix.backend == 'gpu'
        run: |
          echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
          echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
          sudo apt update
          sudo apt install --allow-unauthenticated -y --no-install-recommends \
            intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \
            intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \
            intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16
      - name: Install packages (cuda)
        if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton'
        run: |
          echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
          sudo apt update -y
          sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev \
            flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc
      - name: Cache gpuocelot
        if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton'
        id: cache-build
        uses: actions/cache@v3
        env:
          cache-name: cache-gpuocelot-build
        with:
          path: ${{ github.workspace }}/gpuocelot/ocelot
          key: ubuntu22.04-gpuocelot-18401f4245b27ca4b3af433196583cc81ef84480-rebuild
      - name: Clone/compile gpuocelot
        if: (matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton') && steps.cache-build.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules https://github.com/gpuocelot/gpuocelot.git ${{ github.workspace }}/gpuocelot
          cd ${{ github.workspace }}/gpuocelot/ocelot
          git checkout 18401f4245b27ca4b3af433196583cc81ef84480
          mkdir build
          cd build
          cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF -DCMAKE_BUILD_ALWAYS=0 -DBUILD_TESTS_CUDA=OFF
          ninja
      - name: Install gpuocelot
        if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton'
        run: |
          cd ${{ github.workspace }}/gpuocelot/ocelot/build
          sudo ninja install -d explain
      - name: Install dependencies
        run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/
      - name: Check Device.DEFAULT and print some source
        run: |
          python -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU'], Device.DEFAULT"
          DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
      - name: Run pytest (not cuda)
        if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton'
        run: python -m pytest -n=auto test/ -m 'not exclude_${{matrix.backend}}' --durations=20
      - name: Run ONNX (only LLVM)
        if: matrix.backend == 'llvm'
        run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - name: Run pytest (cuda)
        if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton'
        run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models --durations=20

  #testunicorn:
  #  name: ARM64 unicorn Test
  #  runs-on: ubuntu-latest
  #  timeout-minutes: 20
  #  steps:
  #    - name: Checkout Code
  #      uses: actions/checkout@v3
  #    - name: Set up Python 3.11
  #      uses: actions/setup-python@v4
  #      with:
  #        python-version: 3.11
  #    - name: Cache python packages
  #      uses: actions/cache@v3
  #      with:
  #        path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
  #        key: testing-arm-packages-${{ hashFiles('**/setup.py') }}
  #    - name: Install cross-assembler
  #      run: |
  #        sudo apt update -y
  #        sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu
  #    - name: Install dependencies
  #      run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu
  #    - name: Test arm
  #      run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py  --ignore=test/unit/test_disk_tensor.py