add 2 to locals, uops debug 5 (#1782)
parent
2a11669e1d
commit
6100d7425f
7
setup.py
7
setup.py
|
@ -19,9 +19,9 @@ setup(name='tinygrad',
|
|||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License"
|
||||
],
|
||||
install_requires=["numpy", "requests", "pillow", "tqdm", "networkx", "pyopencl", "PyYAML",
|
||||
"pyobjc-framework-Metal; platform_system=='Darwin'",
|
||||
"pyobjc-framework-Cocoa; platform_system=='Darwin'",
|
||||
install_requires=["numpy", "requests", "pillow", "tqdm", "networkx", "pyopencl", "PyYAML",
|
||||
"pyobjc-framework-Metal; platform_system=='Darwin'",
|
||||
"pyobjc-framework-Cocoa; platform_system=='Darwin'",
|
||||
"pyobjc-framework-libdispatch; platform_system=='Darwin'"],
|
||||
python_requires='>=3.8',
|
||||
extras_require={
|
||||
|
@ -46,6 +46,7 @@ setup(name='tinygrad',
|
|||
"tabulate",
|
||||
"safetensors",
|
||||
"types-PyYAML",
|
||||
"types-tqdm",
|
||||
"cloudpickle",
|
||||
"transformers",
|
||||
"nevergrad",
|
||||
|
|
|
@ -415,7 +415,7 @@ class Linearizer(OptimizedKernel):
|
|||
if arg == BinaryOps.DIV and vin[1].uop == UOps.CONST and vin[1].arg == 1.0: return vin[0]
|
||||
if cachable and key in self.saved_exprs: return self.saved_exprs[key]
|
||||
self.uops.append(UOp(uop, dtype, vin, arg, len(self.uops)))
|
||||
if DEBUG >= 4: print(self.uops[-1])
|
||||
if DEBUG >= 5: print(self.uops[-1])
|
||||
if cachable: self.saved_exprs[key] = self.uops[-1]
|
||||
return self.uops[-1]
|
||||
|
||||
|
|
|
@ -402,7 +402,7 @@ class OptimizedKernel(Kernel):
|
|||
to_local: List[Tuple[int, int]] = []
|
||||
for _, axis in sorted(local_axis_ranking, key=lambda x: (-x[0], -x[1])):
|
||||
local_size = prod(sz for _, sz in to_local)
|
||||
local_sz: Optional[int] = next((x for x in ([32] * (axis == 0) + [16, 8, 4, 3]) if self.full_shape[axis] % x == 0 and local_size * x <= 128), None)
|
||||
local_sz: Optional[int] = next((x for x in ([32] * (axis == 0) + [16, 8, 4, 3, 2]) if self.full_shape[axis] % x == 0 and local_size * x <= 128), None)
|
||||
if local_sz is not None: to_local.append((axis, local_sz))
|
||||
for axis, local_sz in sorted(to_local[:3]):
|
||||
self.shift_to(axis, local_sz, insert_before=self.first_reduce)
|
||||
|
|
|
@ -8,7 +8,7 @@ from tinygrad.codegen.kernel import LinearizerOptions
|
|||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
|
||||
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
|
||||
if DEBUG >= 5:
|
||||
if DEBUG >= 6:
|
||||
from extra.helpers import enable_early_exec
|
||||
early_exec = enable_early_exec()
|
||||
|
||||
|
@ -45,7 +45,7 @@ class HIPProgram:
|
|||
except Exception as e:
|
||||
if DEBUG >= 3: print("FAILED TO BUILD", prg)
|
||||
raise e
|
||||
if DEBUG >= 5:
|
||||
if DEBUG >= 6:
|
||||
asm = early_exec((["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], prg))
|
||||
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
|
||||
|
||||
|
|
Loading…
Reference in New Issue