Reenable tensor cores for self-hosted Mac CI (#1717)
* debug 5 matmul * allow tensor cores in CI * tensor cores on arm64 * put debug backpull/1718/head
parent
ac183568be
commit
fdd7f282cb
|
@ -22,7 +22,9 @@ jobs:
|
|||
run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
|
||||
shell: bash
|
||||
- name: Run Tensor Core GEMM
|
||||
run: DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
|
||||
run: |
|
||||
ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu
|
||||
DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
|
||||
shell: bash
|
||||
- name: Run Stable Diffusion
|
||||
run: |
|
||||
|
|
|
@ -6,6 +6,7 @@ notebooks
|
|||
.*.swo
|
||||
*.pyc
|
||||
*.so
|
||||
*.txt
|
||||
build
|
||||
/dist
|
||||
*.egg-info
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Tuple, List, cast
|
||||
import itertools, math
|
||||
import itertools, math, os
|
||||
from tinygrad.helpers import DEBUG, prod, getenv, ImageDType, dtypes
|
||||
from tinygrad.ops import ReduceOps, BinaryOps, UnaryOps, LazyOp
|
||||
from tinygrad.codegen.kernel import Kernel, LocalBuffer
|
||||
|
@ -228,7 +228,7 @@ class OptimizedKernel(Kernel):
|
|||
|
||||
# should use METAL tensor cores?
|
||||
# first, confirm it's a straightforward mulacc on a device with real locals
|
||||
tensor_cores_allowed = getenv("TC", 1) != 0 and (getenv("TC", 1) == 2 or (self.bufs[0].device == "METAL" and getenv("CI", "") != "true"))
|
||||
tensor_cores_allowed = getenv("TC", 1) != 0 and (getenv("TC", 1) == 2 or (self.bufs[0].device == "METAL" and os.uname().machine == "arm64"))
|
||||
if tensor_cores_allowed and self.reduceop and self.reduceop.op == ReduceOps.SUM and \
|
||||
isinstance(self.reduceop.src[0], LazyOp) and self.reduceop.src[0].op == BinaryOps.MUL and \
|
||||
isinstance(self.reduceop.src[0].src[0], LazyBuffer) and isinstance(self.reduceop.src[0].src[1], LazyBuffer) and self.opts.has_local:
|
||||
|
|
Loading…
Reference in New Issue