1
0
Fork 0

torch test touchup

pull/1285/head
George Hotz 2023-07-19 09:37:23 -07:00
parent 59af9b81c5
commit d6637623e3
2 changed files with 7 additions and 15 deletions

3
.gitignore vendored
View File

@ -35,4 +35,5 @@ examples/webgpu/net.js
examples/webgpu/net.safetensors
node_modules
package.json
package-lock.json
package-lock.json
temp

View File

@ -93,8 +93,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args):
desc = "faster" if et_torch > et_tinygrad else "slower"
flops = save_ops*1e-6
mem = save_mem*1e-6
print(f"{prefix}{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB")
prefix = " "
print(f"\r{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB")
np.testing.assert_allclose(val_tinygrad, val_torch, atol=1e-4, rtol=1e-3)
def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_x):
@ -112,10 +111,6 @@ def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_
@unittest.skipIf(getenv("BIG") != 1, "no big tests")
class TestBigSpeed(unittest.TestCase):
def setUp(self):
global prefix
prefix = " " if prefix is None else ""
return super().setUp()
def test_add(self):
def f(a, b): return a+b
helper_test_generic_square('add', 8192, f, f)
@ -133,11 +128,6 @@ class TestBigSpeed(unittest.TestCase):
@unittest.skipIf((getenv("BIG") == 1 or Device.DEFAULT == "WEBGPU"), "only big tests")
class TestSpeed(unittest.TestCase):
def setUp(self):
global prefix
prefix = " " if prefix is None else ""
return super().setUp()
def test_sub(self):
def f(a, b): return a-b
helper_test_generic_square('sub', 4096, f, f)
@ -155,13 +145,14 @@ class TestSpeed(unittest.TestCase):
R = 256
def f(a, b): return a.reshape(int(4096//R), int(4096*R)).sum(axis=1)
helper_test_generic_square('partial_sum', 4096, f, f, onearg=True)
@unittest.skip("not really used in models")
def test_cumsum(self):
def f0(a, b): return a.cumsum(axis=0)
def f1(a, b): return a.cumsum(axis=1)
helper_test_generic_square('cumsum_0', 256, f0, f0, onearg=True)
helper_test_generic_square('cumsum_1', 256, f1, f1, onearg=True)
def test_array_packing(self):
N = 2048
def f(a, b): return a.reshape(N, N // 32, 32).permute(1,0,2).contiguous()
@ -259,7 +250,7 @@ class TestSpeed(unittest.TestCase):
def f1(torch_dat): return torch_conv(torch_dat.permute(0,3,1,2))
def f2(tiny_dat): return tiny_conv(tiny_dat.permute(0,3,1,2)).realize()
helper_test_generic(f"conv bs:{bs:3d} chans:{in_chans:3d} -> {out_chans:3d}", f1, (torch_dat,), TinyJit(f2), (tiny_dat,))
helper_test_generic(f"conv bs:{bs:3d} chans:{in_chans:3d} -> {out_chans:3d} k:3", f1, (torch_dat,), TinyJit(f2), (tiny_dat,))
def test_conv2d(self):
for bs in [32]: