1
0
Fork 0

more tensor docstrings

deepcrayon
Jeff Moe 2023-12-07 09:57:38 -07:00
parent 67a6e588fb
commit e76c916978
1 changed files with 288 additions and 13 deletions

View File

@ -51,6 +51,7 @@ class Function:
requires_grad (Union[bool, None]): Indicates whether the output tensor requires gradient computation.
parents (List[Tensor]): The parent tensors for which gradients can be computed.
"""
def __init__(self, device: str, *tensors: Tensor):
self.device = device
self.needs_input_grad = [t.requires_grad for t in tensors]
@ -113,7 +114,7 @@ import tinygrad.mlops as mlops
class Tensor:
'''
"""
This class represents a tensor, which is the fundamental unit of data in tinygrad.
It can be used for various mathematical operations and machine learning applications.
@ -123,7 +124,8 @@ class Tensor:
training (ClassVar[bool]): Class variable to track if the tensor is in training mode or not.
no_grad (ClassVar[bool]): Class variable to track if gradient computation is disabled or not.
default_type (ClassVar[DType]): Default data type for tensors.
'''
"""
__slots__ = "lazydata", "requires_grad", "grad", "_ctx"
__deletable__ = ("_ctx",)
training: ClassVar[bool] = False
@ -148,7 +150,7 @@ class Tensor:
dtype: Optional[DType] = None,
requires_grad: Optional[bool] = None,
):
'''
"""
Constructs a new tensor from the given data with the specified device and data type.
Args:
@ -156,7 +158,7 @@ class Tensor:
device (Optional[str]): Device where the tensor will be stored.
dtype (Optional[DType]): Data type of the tensor.
requires_grad (Optional[bool]): Flag indicating if gradient computation is required or not.
'''
"""
assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
device = Device.canonicalize(device)
# tensors have gradients, buffers do not
@ -820,6 +822,7 @@ class Tensor:
Returns:
List[Tensor]: A list of tensors in topological order (deepest first).
"""
def _deepwalk(node, visited, nodes):
visited.add(node)
if getattr(node, "_ctx", None):
@ -1043,6 +1046,7 @@ class Tensor:
Returns:
Tensor: The tensor item corresponding to the given index or indices.
"""
def normalize_int(e, i, dim_sz):
"""
Normalize an integer index based on its dimension size.
@ -1055,6 +1059,7 @@ class Tensor:
Returns:
int: The normalized integer index.
"""
def normalize_int(e, i, dim_sz):
"""
Normalize an integer index based on its dimension size.
@ -2634,45 +2639,250 @@ class Tensor:
# ***** activation functions (unary) *****
def elu(self, alpha=1.0):
"""
Calculate the Exponential Linear Unit (ELU) activation function.
This method calculates the ELU function for each element in `self`. The ELU function is defined as:
f(x) = max(0, x) - alpha * exp(-x) if x <= 0
f(x) = x if x > 0
Parameters:
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
Returns:
ndarray: The transformed array after applying the ELU function element-wise.
Attributes:
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. ReLU replaces all negative values with zero and keeps positive values unchanged.
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
"""
return self.relu() - alpha * (1 - self.exp()).relu()
def celu(self, alpha=1.0):
"""
Calculate the Continuously Differentiable Exponential Linear Unit (C-ELU) activation function.
This method calculates the C-ELU function for each element in `self`. The C-ELU function is defined as:
f(x) = max(0, x) + alpha * exp(-x / alpha) if x <= 0
f(x) = x if x > 0
Parameters:
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
Returns:
ndarray: The transformed array after applying the C-ELU function element-wise.
Attributes:
maximum (method): A method that takes the element-wise maximum of `self` and another array or scalar.
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
minimum (method): A method that takes the element-wise minimum of `self` and another array or scalar.
"""
return self.maximum(0) + (alpha * ((self / alpha).exp() - 1)).minimum(0)
def swish(self):
"""
Calculate the Swish activation function.
This method calculates the Swish function for each element in `self`. The Swish function is defined as:
f(x) = x * sigmoid(x)
Returns:
ndarray: The transformed array after applying the Swish function element-wise.
Attributes:
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
f(x) = 1 / (1 + exp(-x))
"""
return self * self.sigmoid()
def silu(self):
"""
Calculate the Sigmoid Weighted Linear Unit (SiLU) activation function, also known as the swish function.
This method calculates the SiLU function for each element in `self` using the Swish function. The SiLU function is defined as:
f(x) = x * sigmoid(x)
Returns:
ndarray: The transformed array after applying the SiLU function element-wise.
Attributes:
swish (method): A method that applies the Swish function to the data in `self`. The Swish function is defined as:
f(x) = x * sigmoid(x)
"""
return self.swish() # The SiLU function is also known as the swish function.
def relu6(self):
"""
Calculate the Rectified Linear Unit 6 (ReLU6) activation function.
This method calculates the ReLU6 function for each element in `self`. The ReLU6 function is defined as:
f(x) = min(max(0, x), 6)
Returns:
ndarray: The transformed array after applying the ReLU6 function element-wise.
Attributes:
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
f(x) = max(0, x)
"""
return self.relu() - (self - 6).relu()
def hardswish(self):
"""
Calculate the Hard Swish activation function.
This method calculates the Hard Swish function for each element in `self`. The Hard Swish function is defined as:
f(x) = x * (((x + 3) min 6) max 0) / 6
Returns:
ndarray: The transformed array after applying the Hard Swish function element-wise.
Attributes:
relu6 (method): A method that applies the Rectified Linear Unit 6 (ReLU6) function to the data in `self`. The ReLU6 function is defined as:
f(x) = min(max(0, x), 6)
"""
return self * (self + 3).relu6() * (1 / 6)
def tanh(self):
"""
Calculate the Hyperbolic Tangent (tanh) activation function.
This method calculates the tanh function for each element in `self`. The tanh function is defined as:
f(x) = 2 * sigmoid(2 * x) - 1
Returns:
ndarray: The transformed array after applying the tanh function element-wise.
Attributes:
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
f(x) = 1 / (1 + exp(-x))
"""
return 2.0 * ((2.0 * self).sigmoid()) - 1.0
def sinh(self):
"""
Calculate the Hyperbolic Sine (sinh) activation function.
This method calculates the sinh function for each element in `self`. The sinh function is defined as:
f(x) = (exp(x) - exp(-x)) / 2
Returns:
ndarray: The transformed array after applying the sinh function element-wise.
Attributes:
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
f(x) = e^x
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
"""
return (self.exp() - self.neg().exp()) / 2
def cosh(self):
"""
Calculate the Hyperbolic Cosine (cosh) activation function.
This method calculates the cosh function for each element in `self`. The cosh function is defined as:
f(x) = (exp(x) + exp(-x)) / 2
Returns:
ndarray: The transformed array after applying the cosh function element-wise.
Attributes:
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
f(x) = e^x
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
"""
return (self.exp() + self.neg().exp()) / 2
def atanh(self):
"""
Calculate the Inverse Hyperbolic Tangent (atanh) activation function.
This method calculates the atanh function for each element in `self`. The atanh function is defined as:
f(x) = log((1 + x) / (1 - x)) / 2
Returns:
ndarray: The transformed array after applying the atanh function element-wise.
Attributes:
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
f(x) = ln(x)
"""
return ((1 + self) / (1 - self)).log() / 2
def asinh(self):
"""
Calculate the Inverse Hyperbolic Sine (asinh) activation function.
This method calculates the asinh function for each element in `self`. The asinh function is defined as:
f(x) = log(x + sqrt(1 + x^2))
Returns:
ndarray: The transformed array after applying the asinh function element-wise.
Attributes:
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
f(x) = ln(x)
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
f(x) = sqrt(x)
"""
return (self + (self.square() + 1).sqrt()).log()
def acosh(self):
"""
Calculate the Inverse Hyperbolic Cosine (acosh) activation function.
This method calculates the acosh function for each element in `self`. The acosh function is defined as:
f(x) = log(x + sqrt((x - 1)(x + 1)))
Returns:
ndarray: The transformed array after applying the acosh function element-wise.
Attributes:
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
f(x) = ln(x)
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
f(x) = sqrt(x)
"""
return (self + (self.square() - 1).sqrt()).log()
def hardtanh(self, min_val=-1, max_val=1):
"""
Apply the HardTanh activation function.
This method applies the HardTanh function to each element in `self`. The HardTanh function is defined as:
f(x) = max_val if x > max_val
= min_val if x < min_val
= x otherwise
Args:
min_val (float): The minimum value of the output range. Defaults to -1.
max_val (float): The maximum value of the output range. Defaults to 1.
Returns:
ndarray: The transformed array after applying the HardTanh function element-wise.
Attributes:
clip(method): A method that clips `self` to a specified range [min_val, max_val]. If an element in `self` is less than min_val, it is set to min_val.
If an element is greater than max_val, it is set to max_val. The clip operation does not modify elements that are within the range [min_val, max_val].
"""
return self.clip(min_val, max_val)
def gelu(self):
"""
Apply the Gaussian Error Linear Unit (GELU) activation function.
This method applies the GELU function to each element in `self`. The GELU function is defined as:
f(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
Returns:
ndarray: The transformed array after applying the GELU function element-wise.
Attributes:
tanh (method): A method that applies the Hyperbolic Tangent function to the data in `self`. The Hyperbolic Tangent function is defined as:
f(x) = tanh(x)
"""
return (
0.5
* self
@ -2680,18 +2890,83 @@ class Tensor:
)
def quick_gelu(self):
"""
Apply a faster approximation of Gaussian Error Linear Unit (GELU) activation function.
This method applies an approximate GELU function to each element in `self`. The approximate GELU function is defined as:
f(x) = x * sigmoid(x * 1.702)
Returns:
ndarray: The transformed array after applying the approximate GELU function element-wise.
Attributes:
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
f(x) = 1 / (1 + exp(-x))
"""
return self * (self * 1.702).sigmoid()
def leakyrelu(self, neg_slope=0.01):
"""
Apply the Leaky ReLU activation function.
This method applies the Leaky ReLU function to each element in `self`. The Leaky ReLU function is defined as:
f(x) = max(x, neg_slope * x)
Args:
neg_slope (float): The negative slope parameter for the Leaky ReLU function. Default is 0.01.
Returns:
ndarray: The transformed array after applying the Leaky ReLU function element-wise.
Attributes:
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
f(x) = max(0, x)
"""
return self.relu() - (-neg_slope * self).relu()
def mish(self):
"""
Apply the Mish activation function.
This method applies the Mish function to each element in `self`. The Mish function is defined as:
f(x) = x * tanh(softplus(x))
Returns:
ndarray: The transformed array after applying the Mish function element-wise.
Attributes:
softplus (method): A method that applies the Softplus function to the data in `self`. The Softplus function is defined as:
f(x) = log(1 + exp(x))
tanh (method): A method that applies the hyperbolic tangent function to the data in `self`. The hyperbolic tangent function is defined as:
f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
"""
return self * self.softplus().tanh()
def softplus(self, beta=1):
"""
Apply the Softplus function.
This method applies the Softplus function to each element in `self`. The Softplus function is defined as:
f(x) = (1/beta) * log(1 + exp(beta * x))
Args:
beta (float): The beta parameter for the Softplus function. Default is 1.
Returns:
ndarray: The transformed array after applying the Softplus function element-wise.
"""
return (1 / beta) * (1 + (self * beta).exp()).log()
def softsign(self):
"""
Apply the Softsign function.
This method applies the Softsign function to each element in `self`. The Softsign function is defined as:
f(x) = x / (1 + |x|)
Returns:
ndarray: The transformed array after applying the Softsign function element-wise.
"""
return self / (1 + self.abs())
# ***** broadcasted binary mlops *****