torch test touchup
parent
59af9b81c5
commit
d6637623e3
|
@ -35,4 +35,5 @@ examples/webgpu/net.js
|
|||
examples/webgpu/net.safetensors
|
||||
node_modules
|
||||
package.json
|
||||
package-lock.json
|
||||
package-lock.json
|
||||
temp
|
||||
|
|
|
@ -93,8 +93,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args):
|
|||
desc = "faster" if et_torch > et_tinygrad else "slower"
|
||||
flops = save_ops*1e-6
|
||||
mem = save_mem*1e-6
|
||||
print(f"{prefix}{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB")
|
||||
prefix = " "
|
||||
print(f"\r{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB")
|
||||
np.testing.assert_allclose(val_tinygrad, val_torch, atol=1e-4, rtol=1e-3)
|
||||
|
||||
def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_x):
|
||||
|
@ -112,10 +111,6 @@ def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_
|
|||
|
||||
@unittest.skipIf(getenv("BIG") != 1, "no big tests")
|
||||
class TestBigSpeed(unittest.TestCase):
|
||||
def setUp(self):
|
||||
global prefix
|
||||
prefix = " " if prefix is None else ""
|
||||
return super().setUp()
|
||||
def test_add(self):
|
||||
def f(a, b): return a+b
|
||||
helper_test_generic_square('add', 8192, f, f)
|
||||
|
@ -133,11 +128,6 @@ class TestBigSpeed(unittest.TestCase):
|
|||
|
||||
@unittest.skipIf((getenv("BIG") == 1 or Device.DEFAULT == "WEBGPU"), "only big tests")
|
||||
class TestSpeed(unittest.TestCase):
|
||||
def setUp(self):
|
||||
global prefix
|
||||
prefix = " " if prefix is None else ""
|
||||
return super().setUp()
|
||||
|
||||
def test_sub(self):
|
||||
def f(a, b): return a-b
|
||||
helper_test_generic_square('sub', 4096, f, f)
|
||||
|
@ -155,13 +145,14 @@ class TestSpeed(unittest.TestCase):
|
|||
R = 256
|
||||
def f(a, b): return a.reshape(int(4096//R), int(4096*R)).sum(axis=1)
|
||||
helper_test_generic_square('partial_sum', 4096, f, f, onearg=True)
|
||||
|
||||
|
||||
@unittest.skip("not really used in models")
|
||||
def test_cumsum(self):
|
||||
def f0(a, b): return a.cumsum(axis=0)
|
||||
def f1(a, b): return a.cumsum(axis=1)
|
||||
helper_test_generic_square('cumsum_0', 256, f0, f0, onearg=True)
|
||||
helper_test_generic_square('cumsum_1', 256, f1, f1, onearg=True)
|
||||
|
||||
|
||||
def test_array_packing(self):
|
||||
N = 2048
|
||||
def f(a, b): return a.reshape(N, N // 32, 32).permute(1,0,2).contiguous()
|
||||
|
@ -259,7 +250,7 @@ class TestSpeed(unittest.TestCase):
|
|||
|
||||
def f1(torch_dat): return torch_conv(torch_dat.permute(0,3,1,2))
|
||||
def f2(tiny_dat): return tiny_conv(tiny_dat.permute(0,3,1,2)).realize()
|
||||
helper_test_generic(f"conv bs:{bs:3d} chans:{in_chans:3d} -> {out_chans:3d}", f1, (torch_dat,), TinyJit(f2), (tiny_dat,))
|
||||
helper_test_generic(f"conv bs:{bs:3d} chans:{in_chans:3d} -> {out_chans:3d} k:3", f1, (torch_dat,), TinyJit(f2), (tiny_dat,))
|
||||
|
||||
def test_conv2d(self):
|
||||
for bs in [32]:
|
||||
|
|
Loading…
Reference in New Issue