Sum doesn't need to save the tensor

2022-06-05 12:04:51 -07:00 · 2022-06-05 12:04:51 -07:00 · 2097d814f6
parent c8b569a8c7
commit 2097d814f6
2 changed files with 63 additions and 3 deletions
--- a/docs/design
+++ b/docs/design
@ -0,0 +1,60 @@
+Getting the core instruction set correct is the value of tinygrad
+
+
+
+Unary Ops
+===
+
+These are the simplest to reason about, and have pointwise mem access.
+
+Forward          :    A      -> B
+Backward (binary):   (B', A) -> A'
+
+
+
+
+Reduce Ops (with axis)
+===
+
+These take in an axis argument. B is smaller than A
+Max and Sum are pretty different, do we really need Max?
+
+Forward          :    A      -> B
+Backward         :    B'     -> A'
+
+
+
+Binary Ops (with broadcasting)
+===
+
+Pointwise mem access also.
+Broadcasting adds complexity, aliased input.
+Unbroadcasting for grad is a sum, but should be combined with the ternary op.
+
+Forward           :   (A,  B)     -> C
+Backward (ternary):   (C', A, B)  -> (A', B')
+
+C.shape = max(A.shape, B.shape)
+
+
+
+Movement Ops
+===
+
+Reshape, Transpose, Slice
+
+Depending on your Tensor implementation, these are free.
+Reshape is almost always free.
+Slice can be made free.
+Transpose is hard to make free except in trivial cases.
+
+Regardless, these are "reindexings" of existing arrays
+
+
+
+
+Processing Ops
+===
+
+Matmul is 1 matmul for forward, 2 for backward.
+Conv2D is very complex.
--- a/tinygrad/ops/ops_gpu.py
+++ b/tinygrad/ops/ops_gpu.py
@ -139,13 +139,13 @@ def reduce_op(ctx, code, code2, inp, axis=None, start="0.0"):

 class Sum(Function):
  def forward(ctx, input, axis=None):
-    ctx.save_for_backward(input, axis)
+    ctx.save_for_backward(input.shape)
    return reduce_op(ctx, "out += a", "out", input, axis=axis)

  def backward(ctx, grad_output):
-    input, axis = ctx.saved_tensors
+    shape_input, = ctx.saved_tensors
    output = GPUBuffer(grad_output.shape, hostbuf=grad_output)
-    return binary_op(ctx, 'a+b', output, buffer_new(ctx, input.shape, zero=True))
+    return binary_op(ctx, 'a+b', output, buffer_new(ctx, shape_input, zero=True))

 class Max(Function):
  def forward(ctx, input, axis=None):