1
0
Fork 0

Reenable tensor cores for self-hosted Mac CI (#1717)

* debug 5 matmul

* allow tensor cores in CI

* tensor cores on arm64

* put debug back
pull/1718/head
George Hotz 2023-08-30 07:53:04 -07:00 committed by GitHub
parent ac183568be
commit fdd7f282cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 6 additions and 3 deletions

View File

@ -22,7 +22,9 @@ jobs:
run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
shell: bash
- name: Run Tensor Core GEMM
run: DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
run: |
ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu
DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
shell: bash
- name: Run Stable Diffusion
run: |

1
.gitignore vendored
View File

@ -6,6 +6,7 @@ notebooks
.*.swo
*.pyc
*.so
*.txt
build
/dist
*.egg-info

View File

@ -1,5 +1,5 @@
from typing import Tuple, List, cast
import itertools, math
import itertools, math, os
from tinygrad.helpers import DEBUG, prod, getenv, ImageDType, dtypes
from tinygrad.ops import ReduceOps, BinaryOps, UnaryOps, LazyOp
from tinygrad.codegen.kernel import Kernel, LocalBuffer
@ -228,7 +228,7 @@ class OptimizedKernel(Kernel):
# should use METAL tensor cores?
# first, confirm it's a straightforward mulacc on a device with real locals
tensor_cores_allowed = getenv("TC", 1) != 0 and (getenv("TC", 1) == 2 or (self.bufs[0].device == "METAL" and getenv("CI", "") != "true"))
tensor_cores_allowed = getenv("TC", 1) != 0 and (getenv("TC", 1) == 2 or (self.bufs[0].device == "METAL" and os.uname().machine == "arm64"))
if tensor_cores_allowed and self.reduceop and self.reduceop.op == ReduceOps.SUM and \
isinstance(self.reduceop.src[0], LazyOp) and self.reduceop.src[0].op == BinaryOps.MUL and \
isinstance(self.reduceop.src[0].src[0], LazyBuffer) and isinstance(self.reduceop.src[0].src[1], LazyBuffer) and self.opts.has_local: