1
0
Fork 0

stop wasting time with the compiler. tinygrad needs to just jit

pull/702/head
George Hotz 2023-03-12 12:08:46 -07:00
parent 46b49d50bd
commit dcac618515
4 changed files with 9 additions and 6 deletions

View File

@ -1,6 +1,8 @@
#!/bin/bash
# note: if we compile tinygrad/nn/__init__.py __dict__ no longer works, and optimizers will silently fail
mypyc --check-untyped-defs --explicit-package-bases --warn-unreachable tinygrad/shape/shapetracker.py tinygrad/shape/symbolic.py \
tinygrad/nn/__init__.py tinygrad/helpers.py tinygrad/mlops.py tinygrad/tensor.py tinygrad/graph.py
tinygrad/helpers.py tinygrad/mlops.py tinygrad/tensor.py tinygrad/graph.py \
#tinygrad/codegen/gpu.py tinygrad/runtime/ops_metal.py
#tinygrad/codegen/ast.py
#tinygrad/nn/__init__.py
#tinygrad/ops.py tinygrad/runtime/ops_metal.py tinygrad/runtime/ops_gpu.py tinygrad/runtime/ops_cpu.py tinygrad/lazy.py
#tinygrad/codegen/ast.py tinygrad/codegen/gpu.py

View File

@ -1,7 +1,7 @@
import itertools
from enum import Enum, auto
from typing import List, Tuple
from tinygrad.helpers import prod, dedup, all_same, colored, dtypes
from tinygrad.helpers import prod, dedup, all_same, colored, DType
from tinygrad.ops import LazyOp, MovementOps, get_lazyop_info, get_buffers, ReduceOps, get_lazyops, map_buffers, GenericShape, ASTRunner
from tinygrad.shape.shapetracker import ShapeTracker, View, strides_for_shape
@ -26,7 +26,7 @@ class Token:
if len(self.axis) == 0: return [0]
acc_strides = [x*(1-self.axis[::-1][i][2]) for i,x in enumerate(strides_for_shape(tuple(1 if r else s for s,_,r in self.axis[::-1])))]
return [sum(t) for t in itertools.product(*[[y*acc_strides[i] for y in range(x[0])] for i,x in enumerate(self.axis[::-1])])]
def decltype(self, dtype=dtypes.float32): return (dtype.name if self.typ == Types.FLOAT else f'{dtype.name}4') + ('*' if self.ptr else str())
def decltype(self, dtype:DType): return (dtype.name if self.typ == Types.FLOAT else f'{dtype.name}4') + ('*' if self.ptr else str())
def __repr__(self): return f"<{self.typ}{'*' if self.ptr else str()} {self.tok}{f'[{self.axis}]' if len(self.axis) else str()}>"
# ast kernel can contain one ReduceOp with arbitrary Binary/Unary ops

View File

@ -129,7 +129,8 @@ class GPUCodegen(ASTKernel):
if const is not None:
self.loaded_keys[(buf_index,o)] = ldr
else:
self.kernel.append(f"{ldr.decltype()} {key} = {ldr.tok};\n")
# NOTE: we always do compute in float32
self.kernel.append(f"{ldr.decltype(dtypes.float32)} {key} = {ldr.tok};\n")
if should_upcast and can_merge:
for j in range(4):
self.loaded_keys[(buf_index,o+j)] = Token(key+f'.{"xyzw"[j]}', Types.FLOAT)

View File

@ -22,7 +22,7 @@ class LazyOp(NamedTuple):
# Any == Union[LazyOp, LazyBuffer, DeviceBuffer]
src: Tuple[Any, ...] # type: ignore
arg: Any = None
# TODO: add dest to support multiple outputs
# TODO: add dest to support multiple outputs. on second thought, multiple outputs will have multiple LazyOps.
# Any == Union[LazyBuffer, DeviceBuffer]
def get_buffers(op:LazyOp) -> List[Any]: return functools.reduce(operator.add, [get_buffers(x) if isinstance(x, LazyOp) else [x] for x in op.src], [])