1
0
Fork 0

fix gpu sum forward (#61)

* ignore venv

* add sum test

* fix sum forward
This commit is contained in:
Rene Delgado 2020-11-06 02:59:16 -03:00 committed by GitHub
parent aea1069f63
commit cd54697fd8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 10 deletions

1
.gitignore vendored
View file

@ -4,3 +4,4 @@ notebooks
build build
dist dist
*.egg-info *.egg-info
/env

View file

@ -57,6 +57,8 @@ class TestOps(unittest.TestCase):
helper_test_op([(45,65)], lambda x: x.sigmoid(), Tensor.sigmoid, gpu=self.gpu) helper_test_op([(45,65)], lambda x: x.sigmoid(), Tensor.sigmoid, gpu=self.gpu)
def test_dot(self): def test_dot(self):
helper_test_op([(45,65), (65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5, gpu=self.gpu) helper_test_op([(45,65), (65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5, gpu=self.gpu)
def test_sum(self):
helper_test_op([(45,1)], lambda x: x.sum(), Tensor.sum, atol=1e-5, gpu=self.gpu)
def test_conv2d(self): def test_conv2d(self):
for bs in [1,8]: for bs in [1,8]:

View file

@ -1,6 +1,8 @@
import numpy as np import numpy as np
from .tensor import Function, register, Tensor from .tensor import Function, register, Tensor
import pyopencl as cl import pyopencl as cl
import pyopencl.array as pycl_array
from pyopencl.reduction import ReductionKernel
import functools import functools
def buffer_new(ctx, shape): def buffer_new(ctx, shape):
@ -16,6 +18,10 @@ def buffer_like(ctx, x):
def clbuild(cl_ctx, prg): def clbuild(cl_ctx, prg):
return cl.Program(cl_ctx, prg).build() return cl.Program(cl_ctx, prg).build()
@functools.lru_cache
def cl_reduct_krnl_build(cl_ctx, *args, **kwargs):
return ReductionKernel(cl_ctx, *args, **kwargs)
def binary_op(ctx, code, x, y): def binary_op(ctx, code, x, y):
ret = buffer_like(ctx, x) ret = buffer_like(ctx, x)
prg = clbuild(ctx.cl_ctx, """ prg = clbuild(ctx.cl_ctx, """
@ -105,16 +111,11 @@ class Sum(Function):
@staticmethod @staticmethod
def forward(ctx, input): def forward(ctx, input):
ctx.save_for_backward(input) ctx.save_for_backward(input)
ret = buffer_new(ctx, (1,)) krnl = cl_reduct_krnl_build(ctx.cl_ctx, np.float32, neutral="0", reduce_expr="a+b",
prg = clbuild(ctx.cl_ctx, """ map_expr="x[i]", arguments="__global float *x")
__kernel void sum( ret = krnl(pycl_array.Array(ctx.cl_queue, input.size, dtype=np.float32, data=input)).data
__global const float *a_g, __global float *res_g) ret.shape = (1,)
{ ret.dtype = np.float32
int gid = get_global_id(0);
res_g[0] += a_g[gid];
}
""")
prg.sum(ctx.cl_queue, [input.size//4], None, input, ret)
return ret return ret
@staticmethod @staticmethod