1
0
Fork 0

add 2 to locals, uops debug 5 (#1782)

pull/1786/head
George Hotz 2023-09-05 19:44:43 -07:00 committed by GitHub
parent 2a11669e1d
commit 6100d7425f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 8 additions and 7 deletions

View File

@ -19,9 +19,9 @@ setup(name='tinygrad',
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License"
],
install_requires=["numpy", "requests", "pillow", "tqdm", "networkx", "pyopencl", "PyYAML",
"pyobjc-framework-Metal; platform_system=='Darwin'",
"pyobjc-framework-Cocoa; platform_system=='Darwin'",
install_requires=["numpy", "requests", "pillow", "tqdm", "networkx", "pyopencl", "PyYAML",
"pyobjc-framework-Metal; platform_system=='Darwin'",
"pyobjc-framework-Cocoa; platform_system=='Darwin'",
"pyobjc-framework-libdispatch; platform_system=='Darwin'"],
python_requires='>=3.8',
extras_require={
@ -46,6 +46,7 @@ setup(name='tinygrad',
"tabulate",
"safetensors",
"types-PyYAML",
"types-tqdm",
"cloudpickle",
"transformers",
"nevergrad",

View File

@ -415,7 +415,7 @@ class Linearizer(OptimizedKernel):
if arg == BinaryOps.DIV and vin[1].uop == UOps.CONST and vin[1].arg == 1.0: return vin[0]
if cachable and key in self.saved_exprs: return self.saved_exprs[key]
self.uops.append(UOp(uop, dtype, vin, arg, len(self.uops)))
if DEBUG >= 4: print(self.uops[-1])
if DEBUG >= 5: print(self.uops[-1])
if cachable: self.saved_exprs[key] = self.uops[-1]
return self.uops[-1]

View File

@ -402,7 +402,7 @@ class OptimizedKernel(Kernel):
to_local: List[Tuple[int, int]] = []
for _, axis in sorted(local_axis_ranking, key=lambda x: (-x[0], -x[1])):
local_size = prod(sz for _, sz in to_local)
local_sz: Optional[int] = next((x for x in ([32] * (axis == 0) + [16, 8, 4, 3]) if self.full_shape[axis] % x == 0 and local_size * x <= 128), None)
local_sz: Optional[int] = next((x for x in ([32] * (axis == 0) + [16, 8, 4, 3, 2]) if self.full_shape[axis] % x == 0 and local_size * x <= 128), None)
if local_sz is not None: to_local.append((axis, local_sz))
for axis, local_sz in sorted(to_local[:3]):
self.shift_to(axis, local_sz, insert_before=self.first_reduce)

View File

@ -8,7 +8,7 @@ from tinygrad.codegen.kernel import LinearizerOptions
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
if DEBUG >= 5:
if DEBUG >= 6:
from extra.helpers import enable_early_exec
early_exec = enable_early_exec()
@ -45,7 +45,7 @@ class HIPProgram:
except Exception as e:
if DEBUG >= 3: print("FAILED TO BUILD", prg)
raise e
if DEBUG >= 5:
if DEBUG >= 6:
asm = early_exec((["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], prg))
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))