more tensor docstrings
parent
67a6e588fb
commit
e76c916978
|
@ -51,6 +51,7 @@ class Function:
|
|||
requires_grad (Union[bool, None]): Indicates whether the output tensor requires gradient computation.
|
||||
parents (List[Tensor]): The parent tensors for which gradients can be computed.
|
||||
"""
|
||||
|
||||
def __init__(self, device: str, *tensors: Tensor):
|
||||
self.device = device
|
||||
self.needs_input_grad = [t.requires_grad for t in tensors]
|
||||
|
@ -113,7 +114,7 @@ import tinygrad.mlops as mlops
|
|||
|
||||
|
||||
class Tensor:
|
||||
'''
|
||||
"""
|
||||
This class represents a tensor, which is the fundamental unit of data in tinygrad.
|
||||
It can be used for various mathematical operations and machine learning applications.
|
||||
|
||||
|
@ -123,7 +124,8 @@ class Tensor:
|
|||
training (ClassVar[bool]): Class variable to track if the tensor is in training mode or not.
|
||||
no_grad (ClassVar[bool]): Class variable to track if gradient computation is disabled or not.
|
||||
default_type (ClassVar[DType]): Default data type for tensors.
|
||||
'''
|
||||
"""
|
||||
|
||||
__slots__ = "lazydata", "requires_grad", "grad", "_ctx"
|
||||
__deletable__ = ("_ctx",)
|
||||
training: ClassVar[bool] = False
|
||||
|
@ -148,7 +150,7 @@ class Tensor:
|
|||
dtype: Optional[DType] = None,
|
||||
requires_grad: Optional[bool] = None,
|
||||
):
|
||||
'''
|
||||
"""
|
||||
Constructs a new tensor from the given data with the specified device and data type.
|
||||
|
||||
Args:
|
||||
|
@ -156,7 +158,7 @@ class Tensor:
|
|||
device (Optional[str]): Device where the tensor will be stored.
|
||||
dtype (Optional[DType]): Data type of the tensor.
|
||||
requires_grad (Optional[bool]): Flag indicating if gradient computation is required or not.
|
||||
'''
|
||||
"""
|
||||
assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
|
||||
device = Device.canonicalize(device)
|
||||
# tensors have gradients, buffers do not
|
||||
|
@ -820,6 +822,7 @@ class Tensor:
|
|||
Returns:
|
||||
List[Tensor]: A list of tensors in topological order (deepest first).
|
||||
"""
|
||||
|
||||
def _deepwalk(node, visited, nodes):
|
||||
visited.add(node)
|
||||
if getattr(node, "_ctx", None):
|
||||
|
@ -1043,6 +1046,7 @@ class Tensor:
|
|||
Returns:
|
||||
Tensor: The tensor item corresponding to the given index or indices.
|
||||
"""
|
||||
|
||||
def normalize_int(e, i, dim_sz):
|
||||
"""
|
||||
Normalize an integer index based on its dimension size.
|
||||
|
@ -1055,6 +1059,7 @@ class Tensor:
|
|||
Returns:
|
||||
int: The normalized integer index.
|
||||
"""
|
||||
|
||||
def normalize_int(e, i, dim_sz):
|
||||
"""
|
||||
Normalize an integer index based on its dimension size.
|
||||
|
@ -2634,45 +2639,250 @@ class Tensor:
|
|||
# ***** activation functions (unary) *****
|
||||
|
||||
def elu(self, alpha=1.0):
|
||||
"""
|
||||
Calculate the Exponential Linear Unit (ELU) activation function.
|
||||
|
||||
This method calculates the ELU function for each element in `self`. The ELU function is defined as:
|
||||
f(x) = max(0, x) - alpha * exp(-x) if x <= 0
|
||||
f(x) = x if x > 0
|
||||
|
||||
Parameters:
|
||||
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the ELU function element-wise.
|
||||
|
||||
Attributes:
|
||||
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. ReLU replaces all negative values with zero and keeps positive values unchanged.
|
||||
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
|
||||
"""
|
||||
return self.relu() - alpha * (1 - self.exp()).relu()
|
||||
|
||||
def celu(self, alpha=1.0):
|
||||
"""
|
||||
Calculate the Continuously Differentiable Exponential Linear Unit (C-ELU) activation function.
|
||||
|
||||
This method calculates the C-ELU function for each element in `self`. The C-ELU function is defined as:
|
||||
f(x) = max(0, x) + alpha * exp(-x / alpha) if x <= 0
|
||||
f(x) = x if x > 0
|
||||
|
||||
Parameters:
|
||||
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the C-ELU function element-wise.
|
||||
|
||||
Attributes:
|
||||
maximum (method): A method that takes the element-wise maximum of `self` and another array or scalar.
|
||||
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
|
||||
minimum (method): A method that takes the element-wise minimum of `self` and another array or scalar.
|
||||
"""
|
||||
return self.maximum(0) + (alpha * ((self / alpha).exp() - 1)).minimum(0)
|
||||
|
||||
def swish(self):
|
||||
"""
|
||||
Calculate the Swish activation function.
|
||||
|
||||
This method calculates the Swish function for each element in `self`. The Swish function is defined as:
|
||||
f(x) = x * sigmoid(x)
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Swish function element-wise.
|
||||
|
||||
Attributes:
|
||||
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||
f(x) = 1 / (1 + exp(-x))
|
||||
"""
|
||||
return self * self.sigmoid()
|
||||
|
||||
def silu(self):
|
||||
"""
|
||||
Calculate the Sigmoid Weighted Linear Unit (SiLU) activation function, also known as the swish function.
|
||||
|
||||
This method calculates the SiLU function for each element in `self` using the Swish function. The SiLU function is defined as:
|
||||
f(x) = x * sigmoid(x)
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the SiLU function element-wise.
|
||||
|
||||
Attributes:
|
||||
swish (method): A method that applies the Swish function to the data in `self`. The Swish function is defined as:
|
||||
f(x) = x * sigmoid(x)
|
||||
"""
|
||||
return self.swish() # The SiLU function is also known as the swish function.
|
||||
|
||||
def relu6(self):
|
||||
"""
|
||||
Calculate the Rectified Linear Unit 6 (ReLU6) activation function.
|
||||
|
||||
This method calculates the ReLU6 function for each element in `self`. The ReLU6 function is defined as:
|
||||
f(x) = min(max(0, x), 6)
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the ReLU6 function element-wise.
|
||||
|
||||
Attributes:
|
||||
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
|
||||
f(x) = max(0, x)
|
||||
"""
|
||||
return self.relu() - (self - 6).relu()
|
||||
|
||||
def hardswish(self):
|
||||
"""
|
||||
Calculate the Hard Swish activation function.
|
||||
|
||||
This method calculates the Hard Swish function for each element in `self`. The Hard Swish function is defined as:
|
||||
f(x) = x * (((x + 3) min 6) max 0) / 6
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Hard Swish function element-wise.
|
||||
|
||||
Attributes:
|
||||
relu6 (method): A method that applies the Rectified Linear Unit 6 (ReLU6) function to the data in `self`. The ReLU6 function is defined as:
|
||||
f(x) = min(max(0, x), 6)
|
||||
"""
|
||||
return self * (self + 3).relu6() * (1 / 6)
|
||||
|
||||
def tanh(self):
|
||||
"""
|
||||
Calculate the Hyperbolic Tangent (tanh) activation function.
|
||||
|
||||
This method calculates the tanh function for each element in `self`. The tanh function is defined as:
|
||||
f(x) = 2 * sigmoid(2 * x) - 1
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the tanh function element-wise.
|
||||
|
||||
Attributes:
|
||||
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||
f(x) = 1 / (1 + exp(-x))
|
||||
"""
|
||||
return 2.0 * ((2.0 * self).sigmoid()) - 1.0
|
||||
|
||||
def sinh(self):
|
||||
"""
|
||||
Calculate the Hyperbolic Sine (sinh) activation function.
|
||||
|
||||
This method calculates the sinh function for each element in `self`. The sinh function is defined as:
|
||||
f(x) = (exp(x) - exp(-x)) / 2
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the sinh function element-wise.
|
||||
|
||||
Attributes:
|
||||
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
|
||||
f(x) = e^x
|
||||
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
|
||||
"""
|
||||
return (self.exp() - self.neg().exp()) / 2
|
||||
|
||||
def cosh(self):
|
||||
"""
|
||||
Calculate the Hyperbolic Cosine (cosh) activation function.
|
||||
|
||||
This method calculates the cosh function for each element in `self`. The cosh function is defined as:
|
||||
f(x) = (exp(x) + exp(-x)) / 2
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the cosh function element-wise.
|
||||
|
||||
Attributes:
|
||||
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
|
||||
f(x) = e^x
|
||||
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
|
||||
"""
|
||||
return (self.exp() + self.neg().exp()) / 2
|
||||
|
||||
def atanh(self):
|
||||
"""
|
||||
Calculate the Inverse Hyperbolic Tangent (atanh) activation function.
|
||||
|
||||
This method calculates the atanh function for each element in `self`. The atanh function is defined as:
|
||||
f(x) = log((1 + x) / (1 - x)) / 2
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the atanh function element-wise.
|
||||
|
||||
Attributes:
|
||||
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||
f(x) = ln(x)
|
||||
"""
|
||||
return ((1 + self) / (1 - self)).log() / 2
|
||||
|
||||
def asinh(self):
|
||||
"""
|
||||
Calculate the Inverse Hyperbolic Sine (asinh) activation function.
|
||||
|
||||
This method calculates the asinh function for each element in `self`. The asinh function is defined as:
|
||||
f(x) = log(x + sqrt(1 + x^2))
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the asinh function element-wise.
|
||||
|
||||
Attributes:
|
||||
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||
f(x) = ln(x)
|
||||
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
|
||||
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
|
||||
f(x) = sqrt(x)
|
||||
"""
|
||||
return (self + (self.square() + 1).sqrt()).log()
|
||||
|
||||
def acosh(self):
|
||||
"""
|
||||
Calculate the Inverse Hyperbolic Cosine (acosh) activation function.
|
||||
|
||||
This method calculates the acosh function for each element in `self`. The acosh function is defined as:
|
||||
f(x) = log(x + sqrt((x - 1)(x + 1)))
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the acosh function element-wise.
|
||||
|
||||
Attributes:
|
||||
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||
f(x) = ln(x)
|
||||
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
|
||||
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
|
||||
f(x) = sqrt(x)
|
||||
"""
|
||||
return (self + (self.square() - 1).sqrt()).log()
|
||||
|
||||
def hardtanh(self, min_val=-1, max_val=1):
|
||||
"""
|
||||
Apply the HardTanh activation function.
|
||||
|
||||
This method applies the HardTanh function to each element in `self`. The HardTanh function is defined as:
|
||||
f(x) = max_val if x > max_val
|
||||
= min_val if x < min_val
|
||||
= x otherwise
|
||||
|
||||
Args:
|
||||
min_val (float): The minimum value of the output range. Defaults to -1.
|
||||
max_val (float): The maximum value of the output range. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the HardTanh function element-wise.
|
||||
|
||||
Attributes:
|
||||
clip(method): A method that clips `self` to a specified range [min_val, max_val]. If an element in `self` is less than min_val, it is set to min_val.
|
||||
If an element is greater than max_val, it is set to max_val. The clip operation does not modify elements that are within the range [min_val, max_val].
|
||||
"""
|
||||
return self.clip(min_val, max_val)
|
||||
|
||||
def gelu(self):
|
||||
"""
|
||||
Apply the Gaussian Error Linear Unit (GELU) activation function.
|
||||
|
||||
This method applies the GELU function to each element in `self`. The GELU function is defined as:
|
||||
f(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the GELU function element-wise.
|
||||
|
||||
Attributes:
|
||||
tanh (method): A method that applies the Hyperbolic Tangent function to the data in `self`. The Hyperbolic Tangent function is defined as:
|
||||
f(x) = tanh(x)
|
||||
"""
|
||||
return (
|
||||
0.5
|
||||
* self
|
||||
|
@ -2680,18 +2890,83 @@ class Tensor:
|
|||
)
|
||||
|
||||
def quick_gelu(self):
|
||||
"""
|
||||
Apply a faster approximation of Gaussian Error Linear Unit (GELU) activation function.
|
||||
|
||||
This method applies an approximate GELU function to each element in `self`. The approximate GELU function is defined as:
|
||||
f(x) = x * sigmoid(x * 1.702)
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the approximate GELU function element-wise.
|
||||
|
||||
Attributes:
|
||||
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||
f(x) = 1 / (1 + exp(-x))
|
||||
"""
|
||||
return self * (self * 1.702).sigmoid()
|
||||
|
||||
def leakyrelu(self, neg_slope=0.01):
|
||||
"""
|
||||
Apply the Leaky ReLU activation function.
|
||||
|
||||
This method applies the Leaky ReLU function to each element in `self`. The Leaky ReLU function is defined as:
|
||||
f(x) = max(x, neg_slope * x)
|
||||
|
||||
Args:
|
||||
neg_slope (float): The negative slope parameter for the Leaky ReLU function. Default is 0.01.
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Leaky ReLU function element-wise.
|
||||
|
||||
Attributes:
|
||||
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
|
||||
f(x) = max(0, x)
|
||||
"""
|
||||
return self.relu() - (-neg_slope * self).relu()
|
||||
|
||||
def mish(self):
|
||||
"""
|
||||
Apply the Mish activation function.
|
||||
|
||||
This method applies the Mish function to each element in `self`. The Mish function is defined as:
|
||||
f(x) = x * tanh(softplus(x))
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Mish function element-wise.
|
||||
|
||||
Attributes:
|
||||
softplus (method): A method that applies the Softplus function to the data in `self`. The Softplus function is defined as:
|
||||
f(x) = log(1 + exp(x))
|
||||
tanh (method): A method that applies the hyperbolic tangent function to the data in `self`. The hyperbolic tangent function is defined as:
|
||||
f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
||||
"""
|
||||
return self * self.softplus().tanh()
|
||||
|
||||
def softplus(self, beta=1):
|
||||
"""
|
||||
Apply the Softplus function.
|
||||
|
||||
This method applies the Softplus function to each element in `self`. The Softplus function is defined as:
|
||||
f(x) = (1/beta) * log(1 + exp(beta * x))
|
||||
|
||||
Args:
|
||||
beta (float): The beta parameter for the Softplus function. Default is 1.
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Softplus function element-wise.
|
||||
"""
|
||||
return (1 / beta) * (1 + (self * beta).exp()).log()
|
||||
|
||||
def softsign(self):
|
||||
"""
|
||||
Apply the Softsign function.
|
||||
|
||||
This method applies the Softsign function to each element in `self`. The Softsign function is defined as:
|
||||
f(x) = x / (1 + |x|)
|
||||
|
||||
Returns:
|
||||
ndarray: The transformed array after applying the Softsign function element-wise.
|
||||
"""
|
||||
return self / (1 + self.abs())
|
||||
|
||||
# ***** broadcasted binary mlops *****
|
||||
|
|
Loading…
Reference in New Issue