more tensor docstrings

2023-12-07 09:57:38 -07:00 · 2023-12-07 09:57:38 -07:00 · e76c916978
parent 67a6e588fb
commit e76c916978
1 changed files with 288 additions and 13 deletions
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -51,6 +51,7 @@ class Function:
        requires_grad (Union[bool, None]): Indicates whether the output tensor requires gradient computation.
        parents (List[Tensor]): The parent tensors for which gradients can be computed.
    """
+
    def __init__(self, device: str, *tensors: Tensor):
        self.device = device
        self.needs_input_grad = [t.requires_grad for t in tensors]
@ -113,7 +114,7 @@ import tinygrad.mlops as mlops


 class Tensor:
-    '''
+    """
    This class represents a tensor, which is the fundamental unit of data in tinygrad.
    It can be used for various mathematical operations and machine learning applications.

@ -123,7 +124,8 @@ class Tensor:
        training (ClassVar[bool]): Class variable to track if the tensor is in training mode or not.
        no_grad (ClassVar[bool]): Class variable to track if gradient computation is disabled or not.
        default_type (ClassVar[DType]): Default data type for tensors.
-    '''
+    """
+
    __slots__ = "lazydata", "requires_grad", "grad", "_ctx"
    __deletable__ = ("_ctx",)
    training: ClassVar[bool] = False
@ -148,7 +150,7 @@ class Tensor:
        dtype: Optional[DType] = None,
        requires_grad: Optional[bool] = None,
    ):
-        '''
+        """
        Constructs a new tensor from the given data with the specified device and data type.

        Args:
@ -156,7 +158,7 @@ class Tensor:
            device (Optional[str]): Device where the tensor will be stored.
            dtype (Optional[DType]): Data type of the tensor.
            requires_grad (Optional[bool]): Flag indicating if gradient computation is required or not.
-        '''
+        """
        assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
        device = Device.canonicalize(device)
        # tensors have gradients, buffers do not
@ -820,6 +822,7 @@ class Tensor:
        Returns:
            List[Tensor]: A list of tensors in topological order (deepest first).
        """
+
        def _deepwalk(node, visited, nodes):
            visited.add(node)
            if getattr(node, "_ctx", None):
@ -1043,6 +1046,7 @@ class Tensor:
        Returns:
            Tensor: The tensor item corresponding to the given index or indices.
        """
+
        def normalize_int(e, i, dim_sz):
            """
            Normalize an integer index based on its dimension size.
@ -1055,6 +1059,7 @@ class Tensor:
            Returns:
                int: The normalized integer index.
            """
+
        def normalize_int(e, i, dim_sz):
            """
            Normalize an integer index based on its dimension size.
@ -2634,45 +2639,250 @@ class Tensor:
    # ***** activation functions (unary) *****

    def elu(self, alpha=1.0):
+        """
+        Calculate the Exponential Linear Unit (ELU) activation function.
+        
+        This method calculates the ELU function for each element in `self`. The ELU function is defined as:
+            f(x) = max(0, x) - alpha * exp(-x)  if x <= 0
+            f(x) = x                           if x > 0
+        
+        Parameters:
+            alpha (float): A scaling factor for the negative part of the function, default is 1.0.
+            
+        Returns:
+            ndarray: The transformed array after applying the ELU function element-wise.
+        
+        Attributes:
+            relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. ReLU replaces all negative values with zero and keeps positive values unchanged.
+            exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
+        """
        return self.relu() - alpha * (1 - self.exp()).relu()

    def celu(self, alpha=1.0):
+        """
+        Calculate the Continuously Differentiable Exponential Linear Unit (C-ELU) activation function.
+        
+        This method calculates the C-ELU function for each element in `self`. The C-ELU function is defined as:
+            f(x) = max(0, x) + alpha * exp(-x / alpha)  if x <= 0
+            f(x) = x                                    if x > 0
+        
+        Parameters:
+            alpha (float): A scaling factor for the negative part of the function, default is 1.0.
+            
+        Returns:
+            ndarray: The transformed array after applying the C-ELU function element-wise.
+        
+        Attributes:
+            maximum (method): A method that takes the element-wise maximum of `self` and another array or scalar.
+            exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
+            minimum (method): A method that takes the element-wise minimum of `self` and another array or scalar.
+        """
        return self.maximum(0) + (alpha * ((self / alpha).exp() - 1)).minimum(0)

    def swish(self):
+        """
+        Calculate the Swish activation function.
+        
+        This method calculates the Swish function for each element in `self`. The Swish function is defined as:
+            f(x) = x * sigmoid(x)
+        
+        Returns:
+            ndarray: The transformed array after applying the Swish function element-wise.
+        
+        Attributes:
+            sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
+                            f(x) = 1 / (1 + exp(-x))
+        """
        return self * self.sigmoid()

    def silu(self):
+        """
+        Calculate the Sigmoid Weighted Linear Unit (SiLU) activation function, also known as the swish function.
+        
+        This method calculates the SiLU function for each element in `self` using the Swish function. The SiLU function is defined as:
+            f(x) = x * sigmoid(x)
+        
+        Returns:
+            ndarray: The transformed array after applying the SiLU function element-wise.
+        
+        Attributes:
+            swish (method): A method that applies the Swish function to the data in `self`. The Swish function is defined as:
+                            f(x) = x * sigmoid(x)
+        """
        return self.swish()  # The SiLU function is also known as the swish function.

    def relu6(self):
+        """
+        Calculate the Rectified Linear Unit 6 (ReLU6) activation function.
+        
+        This method calculates the ReLU6 function for each element in `self`. The ReLU6 function is defined as:
+            f(x) = min(max(0, x), 6)
+        
+        Returns:
+            ndarray: The transformed array after applying the ReLU6 function element-wise.
+        
+        Attributes:
+            relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
+                        f(x) = max(0, x)
+        """
        return self.relu() - (self - 6).relu()

    def hardswish(self):
+        """
+        Calculate the Hard Swish activation function.
+        
+        This method calculates the Hard Swish function for each element in `self`. The Hard Swish function is defined as:
+            f(x) = x * (((x + 3) min 6) max 0) / 6
+        
+        Returns:
+            ndarray: The transformed array after applying the Hard Swish function element-wise.
+        
+        Attributes:
+            relu6 (method): A method that applies the Rectified Linear Unit 6 (ReLU6) function to the data in `self`. The ReLU6 function is defined as:
+                            f(x) = min(max(0, x), 6)
+        """
        return self * (self + 3).relu6() * (1 / 6)

    def tanh(self):
+        """
+        Calculate the Hyperbolic Tangent (tanh) activation function.
+        
+        This method calculates the tanh function for each element in `self`. The tanh function is defined as:
+            f(x) = 2 * sigmoid(2 * x) - 1
+        
+        Returns:
+            ndarray: The transformed array after applying the tanh function element-wise.
+        
+        Attributes:
+            sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
+                            f(x) = 1 / (1 + exp(-x))
+        """
        return 2.0 * ((2.0 * self).sigmoid()) - 1.0

    def sinh(self):
+        """
+        Calculate the Hyperbolic Sine (sinh) activation function.
+        
+        This method calculates the sinh function for each element in `self`. The sinh function is defined as:
+            f(x) = (exp(x) - exp(-x)) / 2
+        
+        Returns:
+            ndarray: The transformed array after applying the sinh function element-wise.
+        
+        Attributes:
+            exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
+                        f(x) = e^x
+            neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
+        """
        return (self.exp() - self.neg().exp()) / 2

    def cosh(self):
+        """
+        Calculate the Hyperbolic Cosine (cosh) activation function.
+        
+        This method calculates the cosh function for each element in `self`. The cosh function is defined as:
+            f(x) = (exp(x) + exp(-x)) / 2
+        
+        Returns:
+            ndarray: The transformed array after applying the cosh function element-wise.
+        
+        Attributes:
+            exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
+                        f(x) = e^x
+            neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
+        """
        return (self.exp() + self.neg().exp()) / 2

    def atanh(self):
+        """
+        Calculate the Inverse Hyperbolic Tangent (atanh) activation function.
+        
+        This method calculates the atanh function for each element in `self`. The atanh function is defined as:
+            f(x) = log((1 + x) / (1 - x)) / 2
+        
+        Returns:
+            ndarray: The transformed array after applying the atanh function element-wise.
+        
+        Attributes:
+            log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
+                        f(x) = ln(x)
+        """
        return ((1 + self) / (1 - self)).log() / 2

    def asinh(self):
+        """
+        Calculate the Inverse Hyperbolic Sine (asinh) activation function.
+        
+        This method calculates the asinh function for each element in `self`. The asinh function is defined as:
+            f(x) = log(x + sqrt(1 + x^2))
+        
+        Returns:
+            ndarray: The transformed array after applying the asinh function element-wise.
+        
+        Attributes:
+            log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
+                        f(x) = ln(x)
+            square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
+            sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
+                        f(x) = sqrt(x)
+        """
        return (self + (self.square() + 1).sqrt()).log()

    def acosh(self):
+        """
+        Calculate the Inverse Hyperbolic Cosine (acosh) activation function.
+        
+        This method calculates the acosh function for each element in `self`. The acosh function is defined as:
+            f(x) = log(x + sqrt((x - 1)(x + 1)))
+        
+        Returns:
+            ndarray: The transformed array after applying the acosh function element-wise.
+        
+        Attributes:
+            log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
+                        f(x) = ln(x)
+            square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
+            sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
+                        f(x) = sqrt(x)
+        """
        return (self + (self.square() - 1).sqrt()).log()

    def hardtanh(self, min_val=-1, max_val=1):
+        """
+        Apply the HardTanh activation function.
+        
+        This method applies the HardTanh function to each element in `self`. The HardTanh function is defined as:
+            f(x) = max_val if x > max_val
+                = min_val if x < min_val
+                = x otherwise
+        
+        Args:
+            min_val (float): The minimum value of the output range. Defaults to -1.
+            max_val (float): The maximum value of the output range. Defaults to 1.
+            
+        Returns:
+            ndarray: The transformed array after applying the HardTanh function element-wise.
+        
+        Attributes:
+            clip(method): A method that clips `self` to a specified range [min_val, max_val]. If an element in `self` is less than min_val, it is set to min_val. 
+                        If an element is greater than max_val, it is set to max_val. The clip operation does not modify elements that are within the range [min_val, max_val].
+        """
        return self.clip(min_val, max_val)

    def gelu(self):
+        """
+        Apply the Gaussian Error Linear Unit (GELU) activation function.
+        
+        This method applies the GELU function to each element in `self`. The GELU function is defined as:
+            f(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
+        
+        Returns:
+            ndarray: The transformed array after applying the GELU function element-wise.
+        
+        Attributes:
+            tanh (method): A method that applies the Hyperbolic Tangent function to the data in `self`. The Hyperbolic Tangent function is defined as:
+                        f(x) = tanh(x)
+        """
        return (
            0.5
            * self
@ -2680,18 +2890,83 @@ class Tensor:
        )

    def quick_gelu(self):
+        """
+        Apply a faster approximation of Gaussian Error Linear Unit (GELU) activation function.
+        
+        This method applies an approximate GELU function to each element in `self`. The approximate GELU function is defined as:
+            f(x) = x * sigmoid(x * 1.702)
+        
+        Returns:
+            ndarray: The transformed array after applying the approximate GELU function element-wise.
+        
+        Attributes:
+            sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
+                            f(x) = 1 / (1 + exp(-x))
+        """
        return self * (self * 1.702).sigmoid()

    def leakyrelu(self, neg_slope=0.01):
+        """
+        Apply the Leaky ReLU activation function.
+        
+        This method applies the Leaky ReLU function to each element in `self`. The Leaky ReLU function is defined as:
+            f(x) = max(x, neg_slope * x)
+        
+        Args:
+            neg_slope (float): The negative slope parameter for the Leaky ReLU function. Default is 0.01.
+
+        Returns:
+            ndarray: The transformed array after applying the Leaky ReLU function element-wise.
+        
+        Attributes:
+            relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
+                        f(x) = max(0, x)
+        """
        return self.relu() - (-neg_slope * self).relu()

    def mish(self):
+        """
+        Apply the Mish activation function.
+        
+        This method applies the Mish function to each element in `self`. The Mish function is defined as:
+            f(x) = x * tanh(softplus(x))
+        
+        Returns:
+            ndarray: The transformed array after applying the Mish function element-wise.
+        
+        Attributes:
+            softplus (method): A method that applies the Softplus function to the data in `self`. The Softplus function is defined as:
+                                f(x) = log(1 + exp(x))
+            tanh (method): A method that applies the hyperbolic tangent function to the data in `self`. The hyperbolic tangent function is defined as:
+                        f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
+        """
        return self * self.softplus().tanh()

    def softplus(self, beta=1):
+        """
+        Apply the Softplus function.
+        
+        This method applies the Softplus function to each element in `self`. The Softplus function is defined as:
+            f(x) = (1/beta) * log(1 + exp(beta * x))
+        
+        Args:
+            beta (float): The beta parameter for the Softplus function. Default is 1.
+
+        Returns:
+            ndarray: The transformed array after applying the Softplus function element-wise.
+        """
        return (1 / beta) * (1 + (self * beta).exp()).log()

    def softsign(self):
+        """
+        Apply the Softsign function.
+        
+        This method applies the Softsign function to each element in `self`. The Softsign function is defined as:
+            f(x) = x / (1 + |x|)
+        
+        Returns:
+            ndarray: The transformed array after applying the Softsign function element-wise.
+        """
        return self / (1 + self.abs())

    # ***** broadcasted binary mlops *****