more tensor docstrings
parent
67a6e588fb
commit
e76c916978
|
@ -51,6 +51,7 @@ class Function:
|
||||||
requires_grad (Union[bool, None]): Indicates whether the output tensor requires gradient computation.
|
requires_grad (Union[bool, None]): Indicates whether the output tensor requires gradient computation.
|
||||||
parents (List[Tensor]): The parent tensors for which gradients can be computed.
|
parents (List[Tensor]): The parent tensors for which gradients can be computed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, device: str, *tensors: Tensor):
|
def __init__(self, device: str, *tensors: Tensor):
|
||||||
self.device = device
|
self.device = device
|
||||||
self.needs_input_grad = [t.requires_grad for t in tensors]
|
self.needs_input_grad = [t.requires_grad for t in tensors]
|
||||||
|
@ -113,7 +114,7 @@ import tinygrad.mlops as mlops
|
||||||
|
|
||||||
|
|
||||||
class Tensor:
|
class Tensor:
|
||||||
'''
|
"""
|
||||||
This class represents a tensor, which is the fundamental unit of data in tinygrad.
|
This class represents a tensor, which is the fundamental unit of data in tinygrad.
|
||||||
It can be used for various mathematical operations and machine learning applications.
|
It can be used for various mathematical operations and machine learning applications.
|
||||||
|
|
||||||
|
@ -123,7 +124,8 @@ class Tensor:
|
||||||
training (ClassVar[bool]): Class variable to track if the tensor is in training mode or not.
|
training (ClassVar[bool]): Class variable to track if the tensor is in training mode or not.
|
||||||
no_grad (ClassVar[bool]): Class variable to track if gradient computation is disabled or not.
|
no_grad (ClassVar[bool]): Class variable to track if gradient computation is disabled or not.
|
||||||
default_type (ClassVar[DType]): Default data type for tensors.
|
default_type (ClassVar[DType]): Default data type for tensors.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
__slots__ = "lazydata", "requires_grad", "grad", "_ctx"
|
__slots__ = "lazydata", "requires_grad", "grad", "_ctx"
|
||||||
__deletable__ = ("_ctx",)
|
__deletable__ = ("_ctx",)
|
||||||
training: ClassVar[bool] = False
|
training: ClassVar[bool] = False
|
||||||
|
@ -148,7 +150,7 @@ class Tensor:
|
||||||
dtype: Optional[DType] = None,
|
dtype: Optional[DType] = None,
|
||||||
requires_grad: Optional[bool] = None,
|
requires_grad: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
'''
|
"""
|
||||||
Constructs a new tensor from the given data with the specified device and data type.
|
Constructs a new tensor from the given data with the specified device and data type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -156,7 +158,7 @@ class Tensor:
|
||||||
device (Optional[str]): Device where the tensor will be stored.
|
device (Optional[str]): Device where the tensor will be stored.
|
||||||
dtype (Optional[DType]): Data type of the tensor.
|
dtype (Optional[DType]): Data type of the tensor.
|
||||||
requires_grad (Optional[bool]): Flag indicating if gradient computation is required or not.
|
requires_grad (Optional[bool]): Flag indicating if gradient computation is required or not.
|
||||||
'''
|
"""
|
||||||
assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
|
assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
|
||||||
device = Device.canonicalize(device)
|
device = Device.canonicalize(device)
|
||||||
# tensors have gradients, buffers do not
|
# tensors have gradients, buffers do not
|
||||||
|
@ -820,6 +822,7 @@ class Tensor:
|
||||||
Returns:
|
Returns:
|
||||||
List[Tensor]: A list of tensors in topological order (deepest first).
|
List[Tensor]: A list of tensors in topological order (deepest first).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _deepwalk(node, visited, nodes):
|
def _deepwalk(node, visited, nodes):
|
||||||
visited.add(node)
|
visited.add(node)
|
||||||
if getattr(node, "_ctx", None):
|
if getattr(node, "_ctx", None):
|
||||||
|
@ -1043,6 +1046,7 @@ class Tensor:
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: The tensor item corresponding to the given index or indices.
|
Tensor: The tensor item corresponding to the given index or indices.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def normalize_int(e, i, dim_sz):
|
def normalize_int(e, i, dim_sz):
|
||||||
"""
|
"""
|
||||||
Normalize an integer index based on its dimension size.
|
Normalize an integer index based on its dimension size.
|
||||||
|
@ -1055,6 +1059,7 @@ class Tensor:
|
||||||
Returns:
|
Returns:
|
||||||
int: The normalized integer index.
|
int: The normalized integer index.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def normalize_int(e, i, dim_sz):
|
def normalize_int(e, i, dim_sz):
|
||||||
"""
|
"""
|
||||||
Normalize an integer index based on its dimension size.
|
Normalize an integer index based on its dimension size.
|
||||||
|
@ -1937,12 +1942,12 @@ class Tensor:
|
||||||
def max_pool2d(self, kernel_size=(2, 2), stride=None, dilation=1):
|
def max_pool2d(self, kernel_size=(2, 2), stride=None, dilation=1):
|
||||||
"""
|
"""
|
||||||
Perform a max pooling operation on the input tensor.
|
Perform a max pooling operation on the input tensor.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
kernel_size (tuple): The size of the sliding window for each dimension of the input tensor. Default is (2, 2).
|
kernel_size (tuple): The size of the sliding window for each dimension of the input tensor. Default is (2, 2).
|
||||||
stride (tuple or None): The stride of the sliding window for each dimension of the input tensor. If not provided, it defaults to be the same as kernel_size.
|
stride (tuple or None): The stride of the sliding window for each dimension of the input tensor. If not provided, it defaults to be the same as kernel_size.
|
||||||
dilation (int): The spacing between the kernel points. Default is 1.
|
dilation (int): The spacing between the kernel points. Default is 1.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: The max pooled tensor.
|
Tensor: The max pooled tensor.
|
||||||
"""
|
"""
|
||||||
|
@ -2106,9 +2111,9 @@ class Tensor:
|
||||||
def apply_matrix(mat, t, dim=0):
|
def apply_matrix(mat, t, dim=0):
|
||||||
"""
|
"""
|
||||||
Apply a 3x3 matrix to a 4x4 matrix in Winograd's F(4x4,3x3) algorithm.
|
Apply a 3x3 matrix to a 4x4 matrix in Winograd's F(4x4,3x3) algorithm.
|
||||||
|
|
||||||
This method is used for applying a 3x3 matrix to a 4x4 matrix as part of the Winograd F(4x4,3x3) convolution algorithm. The function recursively applies the transformation until it reaches the specified dimension.
|
This method is used for applying a 3x3 matrix to a 4x4 matrix as part of the Winograd F(4x4,3x3) convolution algorithm. The function recursively applies the transformation until it reaches the specified dimension.
|
||||||
|
|
||||||
:param mat: A list of lists representing the 3x3 matrix.
|
:param mat: A list of lists representing the 3x3 matrix.
|
||||||
:type mat: List[List[int]]
|
:type mat: List[List[int]]
|
||||||
:param t: A tensor to which the matrix will be applied.
|
:param t: A tensor to which the matrix will be applied.
|
||||||
|
@ -2117,7 +2122,7 @@ class Tensor:
|
||||||
:type dim: int, optional
|
:type dim: int, optional
|
||||||
:return: The transformed tensor after applying the matrix.
|
:return: The transformed tensor after applying the matrix.
|
||||||
:rtype: Tensor
|
:rtype: Tensor
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
HWI (tuple): A tuple representing the input size of Winograd's F(4x4,3x3) algorithm. Default is (6,).
|
HWI (tuple): A tuple representing the input size of Winograd's F(4x4,3x3) algorithm. Default is (6,).
|
||||||
HWO (tuple): A tuple representing the output size of Winograd's F(4x4,3x3) algorithm. Default is (4,).
|
HWO (tuple): A tuple representing the output size of Winograd's F(4x4,3x3) algorithm. Default is (4,).
|
||||||
|
@ -2299,12 +2304,12 @@ class Tensor:
|
||||||
def fix(x: Tensor):
|
def fix(x: Tensor):
|
||||||
"""
|
"""
|
||||||
Fix tensor by reshaping and transposing it.
|
Fix tensor by reshaping and transposing it.
|
||||||
|
|
||||||
This function takes a tensor x as input, reshapes it based on the dimensions of 'ret'
|
This function takes a tensor x as input, reshapes it based on the dimensions of 'ret'
|
||||||
except for the last two dimensions, multiplies these dimensions together with the product
|
except for the last two dimensions, multiplies these dimensions together with the product
|
||||||
of the last two dimensions of 'ret', and finally transposes the tensor based on the axis
|
of the last two dimensions of 'ret', and finally transposes the tensor based on the axis
|
||||||
dimension.
|
dimension.
|
||||||
|
|
||||||
:param x: The input tensor to be fixed.
|
:param x: The input tensor to be fixed.
|
||||||
:type x: Tensor
|
:type x: Tensor
|
||||||
:return: The reshaped, sliced, and transposed tensor.
|
:return: The reshaped, sliced, and transposed tensor.
|
||||||
|
@ -2612,7 +2617,7 @@ class Tensor:
|
||||||
def sign(self):
|
def sign(self):
|
||||||
"""
|
"""
|
||||||
Calculate and return the element-wise sign of the tensor.
|
Calculate and return the element-wise sign of the tensor.
|
||||||
|
|
||||||
For each element in the tensor, this function determines if it is positive or negative and assigns 1 to positive elements and -1 to negative elements. The result is returned as a new tensor with the same shape as the original tensor.
|
For each element in the tensor, this function determines if it is positive or negative and assigns 1 to positive elements and -1 to negative elements. The result is returned as a new tensor with the same shape as the original tensor.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -2623,7 +2628,7 @@ class Tensor:
|
||||||
def reciprocal(self):
|
def reciprocal(self):
|
||||||
"""
|
"""
|
||||||
Calculate and return the element-wise reciprocal of the tensor.
|
Calculate and return the element-wise reciprocal of the tensor.
|
||||||
|
|
||||||
For each element in the tensor, this function calculates its reciprocal (1 divided by the element value). The result is returned as a new tensor with the same shape as the original tensor.
|
For each element in the tensor, this function calculates its reciprocal (1 divided by the element value). The result is returned as a new tensor with the same shape as the original tensor.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -2634,45 +2639,250 @@ class Tensor:
|
||||||
# ***** activation functions (unary) *****
|
# ***** activation functions (unary) *****
|
||||||
|
|
||||||
def elu(self, alpha=1.0):
|
def elu(self, alpha=1.0):
|
||||||
|
"""
|
||||||
|
Calculate the Exponential Linear Unit (ELU) activation function.
|
||||||
|
|
||||||
|
This method calculates the ELU function for each element in `self`. The ELU function is defined as:
|
||||||
|
f(x) = max(0, x) - alpha * exp(-x) if x <= 0
|
||||||
|
f(x) = x if x > 0
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the ELU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. ReLU replaces all negative values with zero and keeps positive values unchanged.
|
||||||
|
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
|
||||||
|
"""
|
||||||
return self.relu() - alpha * (1 - self.exp()).relu()
|
return self.relu() - alpha * (1 - self.exp()).relu()
|
||||||
|
|
||||||
def celu(self, alpha=1.0):
|
def celu(self, alpha=1.0):
|
||||||
|
"""
|
||||||
|
Calculate the Continuously Differentiable Exponential Linear Unit (C-ELU) activation function.
|
||||||
|
|
||||||
|
This method calculates the C-ELU function for each element in `self`. The C-ELU function is defined as:
|
||||||
|
f(x) = max(0, x) + alpha * exp(-x / alpha) if x <= 0
|
||||||
|
f(x) = x if x > 0
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
alpha (float): A scaling factor for the negative part of the function, default is 1.0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the C-ELU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
maximum (method): A method that takes the element-wise maximum of `self` and another array or scalar.
|
||||||
|
exp (method): A method that computes the exponential of all elements in `self`. The exponential is applied element-wise.
|
||||||
|
minimum (method): A method that takes the element-wise minimum of `self` and another array or scalar.
|
||||||
|
"""
|
||||||
return self.maximum(0) + (alpha * ((self / alpha).exp() - 1)).minimum(0)
|
return self.maximum(0) + (alpha * ((self / alpha).exp() - 1)).minimum(0)
|
||||||
|
|
||||||
def swish(self):
|
def swish(self):
|
||||||
|
"""
|
||||||
|
Calculate the Swish activation function.
|
||||||
|
|
||||||
|
This method calculates the Swish function for each element in `self`. The Swish function is defined as:
|
||||||
|
f(x) = x * sigmoid(x)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Swish function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||||
|
f(x) = 1 / (1 + exp(-x))
|
||||||
|
"""
|
||||||
return self * self.sigmoid()
|
return self * self.sigmoid()
|
||||||
|
|
||||||
def silu(self):
|
def silu(self):
|
||||||
|
"""
|
||||||
|
Calculate the Sigmoid Weighted Linear Unit (SiLU) activation function, also known as the swish function.
|
||||||
|
|
||||||
|
This method calculates the SiLU function for each element in `self` using the Swish function. The SiLU function is defined as:
|
||||||
|
f(x) = x * sigmoid(x)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the SiLU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
swish (method): A method that applies the Swish function to the data in `self`. The Swish function is defined as:
|
||||||
|
f(x) = x * sigmoid(x)
|
||||||
|
"""
|
||||||
return self.swish() # The SiLU function is also known as the swish function.
|
return self.swish() # The SiLU function is also known as the swish function.
|
||||||
|
|
||||||
def relu6(self):
|
def relu6(self):
|
||||||
|
"""
|
||||||
|
Calculate the Rectified Linear Unit 6 (ReLU6) activation function.
|
||||||
|
|
||||||
|
This method calculates the ReLU6 function for each element in `self`. The ReLU6 function is defined as:
|
||||||
|
f(x) = min(max(0, x), 6)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the ReLU6 function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
|
||||||
|
f(x) = max(0, x)
|
||||||
|
"""
|
||||||
return self.relu() - (self - 6).relu()
|
return self.relu() - (self - 6).relu()
|
||||||
|
|
||||||
def hardswish(self):
|
def hardswish(self):
|
||||||
|
"""
|
||||||
|
Calculate the Hard Swish activation function.
|
||||||
|
|
||||||
|
This method calculates the Hard Swish function for each element in `self`. The Hard Swish function is defined as:
|
||||||
|
f(x) = x * (((x + 3) min 6) max 0) / 6
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Hard Swish function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
relu6 (method): A method that applies the Rectified Linear Unit 6 (ReLU6) function to the data in `self`. The ReLU6 function is defined as:
|
||||||
|
f(x) = min(max(0, x), 6)
|
||||||
|
"""
|
||||||
return self * (self + 3).relu6() * (1 / 6)
|
return self * (self + 3).relu6() * (1 / 6)
|
||||||
|
|
||||||
def tanh(self):
|
def tanh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Hyperbolic Tangent (tanh) activation function.
|
||||||
|
|
||||||
|
This method calculates the tanh function for each element in `self`. The tanh function is defined as:
|
||||||
|
f(x) = 2 * sigmoid(2 * x) - 1
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the tanh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||||
|
f(x) = 1 / (1 + exp(-x))
|
||||||
|
"""
|
||||||
return 2.0 * ((2.0 * self).sigmoid()) - 1.0
|
return 2.0 * ((2.0 * self).sigmoid()) - 1.0
|
||||||
|
|
||||||
def sinh(self):
|
def sinh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Hyperbolic Sine (sinh) activation function.
|
||||||
|
|
||||||
|
This method calculates the sinh function for each element in `self`. The sinh function is defined as:
|
||||||
|
f(x) = (exp(x) - exp(-x)) / 2
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the sinh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
|
||||||
|
f(x) = e^x
|
||||||
|
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
|
||||||
|
"""
|
||||||
return (self.exp() - self.neg().exp()) / 2
|
return (self.exp() - self.neg().exp()) / 2
|
||||||
|
|
||||||
def cosh(self):
|
def cosh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Hyperbolic Cosine (cosh) activation function.
|
||||||
|
|
||||||
|
This method calculates the cosh function for each element in `self`. The cosh function is defined as:
|
||||||
|
f(x) = (exp(x) + exp(-x)) / 2
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the cosh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
exp (method): A method that applies the Exponential function to the data in `self`. The Exponential function is defined as:
|
||||||
|
f(x) = e^x
|
||||||
|
neg (method): A method that applies the Negation operation to the data in `self`. The Negation operation returns an element-wise negative of `self`.
|
||||||
|
"""
|
||||||
return (self.exp() + self.neg().exp()) / 2
|
return (self.exp() + self.neg().exp()) / 2
|
||||||
|
|
||||||
def atanh(self):
|
def atanh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Inverse Hyperbolic Tangent (atanh) activation function.
|
||||||
|
|
||||||
|
This method calculates the atanh function for each element in `self`. The atanh function is defined as:
|
||||||
|
f(x) = log((1 + x) / (1 - x)) / 2
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the atanh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||||
|
f(x) = ln(x)
|
||||||
|
"""
|
||||||
return ((1 + self) / (1 - self)).log() / 2
|
return ((1 + self) / (1 - self)).log() / 2
|
||||||
|
|
||||||
def asinh(self):
|
def asinh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Inverse Hyperbolic Sine (asinh) activation function.
|
||||||
|
|
||||||
|
This method calculates the asinh function for each element in `self`. The asinh function is defined as:
|
||||||
|
f(x) = log(x + sqrt(1 + x^2))
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the asinh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||||
|
f(x) = ln(x)
|
||||||
|
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
|
||||||
|
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
|
||||||
|
f(x) = sqrt(x)
|
||||||
|
"""
|
||||||
return (self + (self.square() + 1).sqrt()).log()
|
return (self + (self.square() + 1).sqrt()).log()
|
||||||
|
|
||||||
def acosh(self):
|
def acosh(self):
|
||||||
|
"""
|
||||||
|
Calculate the Inverse Hyperbolic Cosine (acosh) activation function.
|
||||||
|
|
||||||
|
This method calculates the acosh function for each element in `self`. The acosh function is defined as:
|
||||||
|
f(x) = log(x + sqrt((x - 1)(x + 1)))
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the acosh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
log (method): A method that applies the Natural Logarithm function to the data in `self`. The Natural Logarithm function is defined as:
|
||||||
|
f(x) = ln(x)
|
||||||
|
square (method): A method that squares each element in `self`. The Square operation returns an element-wise square of `self`.
|
||||||
|
sqrt (method): A method that applies the Square Root function to the data in `self`. The Square Root function is defined as:
|
||||||
|
f(x) = sqrt(x)
|
||||||
|
"""
|
||||||
return (self + (self.square() - 1).sqrt()).log()
|
return (self + (self.square() - 1).sqrt()).log()
|
||||||
|
|
||||||
def hardtanh(self, min_val=-1, max_val=1):
|
def hardtanh(self, min_val=-1, max_val=1):
|
||||||
|
"""
|
||||||
|
Apply the HardTanh activation function.
|
||||||
|
|
||||||
|
This method applies the HardTanh function to each element in `self`. The HardTanh function is defined as:
|
||||||
|
f(x) = max_val if x > max_val
|
||||||
|
= min_val if x < min_val
|
||||||
|
= x otherwise
|
||||||
|
|
||||||
|
Args:
|
||||||
|
min_val (float): The minimum value of the output range. Defaults to -1.
|
||||||
|
max_val (float): The maximum value of the output range. Defaults to 1.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the HardTanh function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
clip(method): A method that clips `self` to a specified range [min_val, max_val]. If an element in `self` is less than min_val, it is set to min_val.
|
||||||
|
If an element is greater than max_val, it is set to max_val. The clip operation does not modify elements that are within the range [min_val, max_val].
|
||||||
|
"""
|
||||||
return self.clip(min_val, max_val)
|
return self.clip(min_val, max_val)
|
||||||
|
|
||||||
def gelu(self):
|
def gelu(self):
|
||||||
|
"""
|
||||||
|
Apply the Gaussian Error Linear Unit (GELU) activation function.
|
||||||
|
|
||||||
|
This method applies the GELU function to each element in `self`. The GELU function is defined as:
|
||||||
|
f(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the GELU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
tanh (method): A method that applies the Hyperbolic Tangent function to the data in `self`. The Hyperbolic Tangent function is defined as:
|
||||||
|
f(x) = tanh(x)
|
||||||
|
"""
|
||||||
return (
|
return (
|
||||||
0.5
|
0.5
|
||||||
* self
|
* self
|
||||||
|
@ -2680,18 +2890,83 @@ class Tensor:
|
||||||
)
|
)
|
||||||
|
|
||||||
def quick_gelu(self):
|
def quick_gelu(self):
|
||||||
|
"""
|
||||||
|
Apply a faster approximation of Gaussian Error Linear Unit (GELU) activation function.
|
||||||
|
|
||||||
|
This method applies an approximate GELU function to each element in `self`. The approximate GELU function is defined as:
|
||||||
|
f(x) = x * sigmoid(x * 1.702)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the approximate GELU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
sigmoid (method): A method that applies the Sigmoid function to the data in `self`. The Sigmoid function is defined as:
|
||||||
|
f(x) = 1 / (1 + exp(-x))
|
||||||
|
"""
|
||||||
return self * (self * 1.702).sigmoid()
|
return self * (self * 1.702).sigmoid()
|
||||||
|
|
||||||
def leakyrelu(self, neg_slope=0.01):
|
def leakyrelu(self, neg_slope=0.01):
|
||||||
|
"""
|
||||||
|
Apply the Leaky ReLU activation function.
|
||||||
|
|
||||||
|
This method applies the Leaky ReLU function to each element in `self`. The Leaky ReLU function is defined as:
|
||||||
|
f(x) = max(x, neg_slope * x)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
neg_slope (float): The negative slope parameter for the Leaky ReLU function. Default is 0.01.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Leaky ReLU function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
relu (method): A method that applies the Rectified Linear Unit (ReLU) function to the data in `self`. The ReLU function is defined as:
|
||||||
|
f(x) = max(0, x)
|
||||||
|
"""
|
||||||
return self.relu() - (-neg_slope * self).relu()
|
return self.relu() - (-neg_slope * self).relu()
|
||||||
|
|
||||||
def mish(self):
|
def mish(self):
|
||||||
|
"""
|
||||||
|
Apply the Mish activation function.
|
||||||
|
|
||||||
|
This method applies the Mish function to each element in `self`. The Mish function is defined as:
|
||||||
|
f(x) = x * tanh(softplus(x))
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Mish function element-wise.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
softplus (method): A method that applies the Softplus function to the data in `self`. The Softplus function is defined as:
|
||||||
|
f(x) = log(1 + exp(x))
|
||||||
|
tanh (method): A method that applies the hyperbolic tangent function to the data in `self`. The hyperbolic tangent function is defined as:
|
||||||
|
f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
||||||
|
"""
|
||||||
return self * self.softplus().tanh()
|
return self * self.softplus().tanh()
|
||||||
|
|
||||||
def softplus(self, beta=1):
|
def softplus(self, beta=1):
|
||||||
|
"""
|
||||||
|
Apply the Softplus function.
|
||||||
|
|
||||||
|
This method applies the Softplus function to each element in `self`. The Softplus function is defined as:
|
||||||
|
f(x) = (1/beta) * log(1 + exp(beta * x))
|
||||||
|
|
||||||
|
Args:
|
||||||
|
beta (float): The beta parameter for the Softplus function. Default is 1.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Softplus function element-wise.
|
||||||
|
"""
|
||||||
return (1 / beta) * (1 + (self * beta).exp()).log()
|
return (1 / beta) * (1 + (self * beta).exp()).log()
|
||||||
|
|
||||||
def softsign(self):
|
def softsign(self):
|
||||||
|
"""
|
||||||
|
Apply the Softsign function.
|
||||||
|
|
||||||
|
This method applies the Softsign function to each element in `self`. The Softsign function is defined as:
|
||||||
|
f(x) = x / (1 + |x|)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: The transformed array after applying the Softsign function element-wise.
|
||||||
|
"""
|
||||||
return self / (1 + self.abs())
|
return self / (1 + self.abs())
|
||||||
|
|
||||||
# ***** broadcasted binary mlops *****
|
# ***** broadcasted binary mlops *****
|
||||||
|
|
Loading…
Reference in New Issue