From ea7ca1401a976fbed7e735c515e0511fdbb92d81 Mon Sep 17 00:00:00 2001 From: tostenzel Date: Fri, 5 Jan 2024 04:44:04 +0100 Subject: [PATCH] Write or improve docstrings and comments --- applications/learn_mnist.py | 2 + .../tensor_broadcasted_binary_mlops.py | 49 +++++++++++++ edugrad/_tensor/tensor_combine_segment.py | 73 +++++++++++++++++-- edugrad/dtypes.py | 6 ++ edugrad/helpers.py | 2 + edugrad/tensor.py | 33 +++++++-- tests/gradcheck.py | 56 ++++++++++++-- 7 files changed, 201 insertions(+), 20 deletions(-) diff --git a/applications/learn_mnist.py b/applications/learn_mnist.py index ce6692b..d87a9a2 100755 --- a/applications/learn_mnist.py +++ b/applications/learn_mnist.py @@ -1,3 +1,5 @@ +"""Train a classifier to recognize the hand-written digit on gray-scale images. and evaluate the results.""" + import os import gzip diff --git a/edugrad/_tensor/tensor_broadcasted_binary_mlops.py b/edugrad/_tensor/tensor_broadcasted_binary_mlops.py index 40a8cde..be8a83f 100644 --- a/edugrad/_tensor/tensor_broadcasted_binary_mlops.py +++ b/edugrad/_tensor/tensor_broadcasted_binary_mlops.py @@ -1,3 +1,8 @@ +""" +This module implements broadcasted binary operations for Tensors, providing +element-wise arithmetic operations that support broadcasting for tensors of different shapes. + +""" from __future__ import annotations import math @@ -10,27 +15,51 @@ def _broadcasted(tensor: Tensor, y: Tensor | float, reverse: bool = False) -> tuple[Tensor, Tensor]: + """Prepares two tensors for broadcasting to a common shape. + + Args: + tensor (Tensor): The first tensor. + y (Tensor | float): The second tensor or a scalar value. + reverse (bool): If True, swaps the tensors before broadcasting. + + Returns: + tuple[Tensor, Tensor]: A tuple of two tensors broadcasted to a common shape. + """ from edugrad.tensor import Tensor x: Tensor = tensor + # If y is not a tensor, convert it to a tensor with the same dtype as the input tensor. + # If the input tensor is empty, return a tensor full of the scalar value y. if not isinstance(y, Tensor): if 0 in x.shape: return x, x.full_like(y) y = Tensor(y, requires_grad=False, dtype=tensor.dtype if tensor.dtype != dtypes.bool else dtypes.float32) + + # Swap tensors if reverse is True. if reverse: x, y = y, x + + # Directly return tensors if they are already the same shape. if (xshape := x.shape) == (yshape := y.shape): return (x, y) + # Adjust shapes to make them broadcastable. This is done by prepending 1's to the shape + # of the shorter tensor until both shapes have the same length. shape_delta = len(xshape) - len(yshape) if shape_delta > 0: y = y.reshape((1,) * shape_delta + yshape) elif shape_delta < 0: x = x.reshape((1,) * -shape_delta + xshape) + + # Check if tensors are now the same shape. If yes, return them. if (xshape := x.shape) == (yshape := y.shape): return (x, y) + # Determine the final shape after broadcasting. This is the element-wise maximum + # of the shapes of the two tensors. shape_ret = tuple([max(x, y) for x, y in zip(xshape, yshape)]) + + # Expand tensors to the final broadcasted shape. if xshape != shape_ret: x = x.expand(shape_ret) if yshape != shape_ret: @@ -39,6 +68,17 @@ def _broadcasted(tensor: Tensor, y: Tensor | float, reverse: bool = False) -> tu def _to_float(tensor: Tensor, x: Tensor | float): + """ + Converts a tensor to float32 dtype if it is not already a Tensor and + if it is suitable for certain operations where float32 dtype is required. + + Args: + tensor (Tensor): The reference tensor to check compatibility. + x (Tensor | float): The tensor or scalar to be converted. + + Returns: + The converted tensor or the original scalar. + """ from edugrad.tensor import Tensor return ( @@ -52,6 +92,7 @@ def _to_float(tensor: Tensor, x: Tensor | float): def add(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: + """Adds two tensors or a tensor and a scalar.""" from edugrad.tensor import Tensor x = tensor._to_float(x) @@ -59,6 +100,7 @@ def add(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: def sub(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: + """Subtracts two tensors or a tensor and a scalar.""" from edugrad.tensor import Tensor x = tensor._to_float(x) @@ -70,6 +112,7 @@ def sub(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: def mul(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: + """Multiplies two tensors or a tensor and a scalar.""" from edugrad.tensor import Tensor x = tensor._to_float(x) @@ -81,6 +124,7 @@ def mul(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: def div(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: + """Divides two tensors or a tensor and a scalar.""" from edugrad.tensor import Tensor x = tensor._to_float(x) @@ -92,6 +136,7 @@ def div(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: + """Raises a tensor to the power of another tensor or a scalar.""" from edugrad.tensor import Tensor x = tensor._to_float(x) @@ -140,18 +185,22 @@ def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor: def matmul(tensor: Tensor, x: Tensor, reverse=False) -> Tensor: + """Performs matrix multiplication.""" return x.dot(tensor) if reverse else tensor.dot(x) def maximum(tensor: Tensor, x: Tensor | float) -> Tensor: + """Computes the element-wise maximum of two tensors.""" return (tensor < x).detach().where(x, (tensor > x).detach().where(tensor, (tensor + x) / 2)) def minimum(tensor: Tensor, x: Tensor | float) -> Tensor: + """Computes the element-wise minimum of two tensors.""" return -((-tensor).maximum(-x)) def where(tensor: Tensor, input_: Tensor | float, other: Tensor | float): + """Selects elements from two tensors based on a condition tensor.""" x_, y = tensor._broadcasted(input_) x, z = x_._broadcasted(other) return function.Where.apply(x, *y._broadcasted(z)) diff --git a/edugrad/_tensor/tensor_combine_segment.py b/edugrad/_tensor/tensor_combine_segment.py index 32eb2e2..fb0bdba 100644 --- a/edugrad/_tensor/tensor_combine_segment.py +++ b/edugrad/_tensor/tensor_combine_segment.py @@ -1,3 +1,5 @@ +"""Contains tensor operations like concatenation, stacking, repeating, and chunking.""" + from __future__ import annotations import math from functools import reduce @@ -5,43 +7,102 @@ from edugrad.helpers import all_int +def cat(tensor: Tensor, *args: Tensor, dim: int) -> Tensor: + """Concatenates the given tensors along a specified dimension. + + Args: + tensor (Tensor): The first tensor to concatenate. + *args (Tensor): Additional tensors to concatenate. + dim (int): The dimension along which to concatenate. -def cat(tensor, *args, dim) -> Tensor: + Returns: + Tensor: A new tensor resulting from concatenating the given tensors. + """ from edugrad.tensor import Tensor + # Adjust the dimension if negative. dim = (dim + len(tensor.shape)) if dim < 0 else dim + + # Ensure all tensors have compatible shapes for concatenation. assert all( len(y.shape) == len(tensor.shape) and all(y.shape[i] == s for i, s in enumerate(tensor.shape) if i != dim) for y in args ) + + # Prepare arguments for concatenation. catargs = [tensor, *args] + + # Assert that tensors are not zero-dimensional. assert all(t.shape for t in catargs), "zero-dimensional tensor cannot be concatenated" + + # Calculate shapes and cumulative shapes for slicing. shapes = [s.shape[dim] for s in catargs] shape_cumsum = [0, *accumulate(shapes)] slc = [[(0, 0) for _ in tensor.shape] for _ in catargs] + + # Adjust slices for each tensor. for shp, k, s in zip(shapes, shape_cumsum[:-1], slc): s[dim] = (k, shape_cumsum[-1] - k - shp) + + # Concatenate by padding and adding tensors. return reduce(Tensor.__add__, [arg.pad(tuple(s)) for arg, s in zip(catargs, slc)]) -@staticmethod -def stack(tensors, dim) -> Tensor: +def stack(tensors: list[Tensor], dim: int) -> Tensor: + """Stacks a list of tensors along a new dimension. + + Args: + tensors (list[Tensor]): The list of tensors to stack. + dim (int): The dimension along which to stack. + + Returns: + Tensor: A new tensor resulting from stacking the given tensors. + """ + from edugrad.tensor import Tensor + + # Unsqueeze the first tensor and prepare the rest. first = tensors[0].unsqueeze(dim) unsqueezed_tensors = [tensor.unsqueeze(dim) for tensor in tensors[1:]] - # checks for shapes and number of Falsedimensions delegated to cat + + # Delegate checks for shapes and number of dimensions to cat. return first.cat(*unsqueezed_tensors, dim=dim) -def repeat(tensor: Tensor, repeats) -> Tensor: +def repeat(tensor: Tensor, repeats: list[int]) -> Tensor: + """Repeats a tensor along specified dimensions. + + Args: + tensor (Tensor): The tensor to repeat. + repeats (list[int]): The number of repetitions for each dimension. + + Returns: + Tensor: A new tensor with repeated values. + """ base_shape = (1,) * (len(repeats) - tensor.ndim) + tensor.shape new_shape = [x for b in base_shape for x in (1, b)] expand_shape = [x for rs in zip(repeats, base_shape) for x in rs] final_shape = [r * s for r, s in zip(repeats, base_shape)] + + # Repeat the tensor by reshaping, expanding, and reshaping again. return tensor.reshape(new_shape).expand(expand_shape).reshape(final_shape) def chunk(tensor: Tensor, num: int, dim: int) -> list[Tensor]: + """Splits a tensor into a specified number of chunks along a given dimension. + + Args: + tensor (Tensor): The tensor to split. + num (int): The number of chunks to create. + dim (int): The dimension along which to split the tensor. + + Returns: + list[Tensor]: A list of tensors representing the chunks. + """ assert all_int(tensor.shape), f"does not support symbolic shape {tensor.shape}" - dim, step = dim + tensor.ndim if dim < 0 else dim, math.ceil(tensor.shape[dim] / num) + dim, step = (dim + tensor.ndim if dim < 0 else dim), math.ceil(tensor.shape[dim] / num) + + # Generate slice parameters for each chunk. slice_params = [[slice(None)] * dim + [slice(k, k + step)] for k in range(0, tensor.shape[dim], step)] + + # Create each chunk by slicing the tensor. return [tensor[tuple(sl)] for sl in slice_params] diff --git a/edugrad/dtypes.py b/edugrad/dtypes.py index 3bb7fe1..685de23 100644 --- a/edugrad/dtypes.py +++ b/edugrad/dtypes.py @@ -1,3 +1,9 @@ +"""Defines the allowed datatypes for intializing and casting Tensors. + +For simplicity we only use bool, int32 and float32. Note that after applying operations, the results are usually +float32 (see `data.TensorData.elementwise()`). + +""" from typing import ClassVar, Dict, Optional, Final import numpy as np from dataclasses import dataclass diff --git a/edugrad/helpers.py b/edugrad/helpers.py index 025e2e3..4e4bff6 100644 --- a/edugrad/helpers.py +++ b/edugrad/helpers.py @@ -1,3 +1,5 @@ +"""Contains helper functions and DEBUG integer for verbose debugging used throughout the package.""" + from typing import Union, Tuple, Iterator, Any import os import functools diff --git a/edugrad/tensor.py b/edugrad/tensor.py index 6a1b2b7..3d7a40b 100644 --- a/edugrad/tensor.py +++ b/edugrad/tensor.py @@ -1,4 +1,4 @@ -"""Contain the tensor class that can be used for building neural networks with forward and backward pass. +"""Contains the tensor class that can be used for building neural networks with forward and backward pass. The module contains the "high-level ops". These are syntax sugar and built on top of the "mid-level ops" containing the the functions with forward and backward passes in Function.function which is build on top of the "low-level ops" @@ -120,16 +120,25 @@ def dtype(self) -> DType: # ------------------------------------------------------------------------------------------------------------------ # data handlers - def assign(self, x) -> Tensor: - # TODO: this is a hack for writing to DISK - if x.__class__ is not Tensor: + def assign(self, x: Any) -> Tensor: + """Assigns the value of another tensor or array to the current tensor. + + This method is a workaround for writing to disk and is used for in-place modification of tensor data. + """ + if not isinstance(x, Tensor): + # Convert x to a Tensor if it's not already one x = Tensor(x, dtype=self.dtype) + assert self.shape == x.shape, f"assign shape mismatch {self.shape} != {x.shape}" - assert not x.requires_grad # tensor requires_grad is okay? + assert not x.requires_grad # Ensure x doesn't require gradient computation + if DEBUG >= 4: print(f"assign {self.data} <- {x.data}") + + # If dtype matches and assignment is allowed, perform the assignment if self.dtype == x.dtype and self.data is not None and not getenv("DISALLOW_ASSIGN"): x.data.output_buffer = self.data + self.data = x.data return self @@ -202,13 +211,25 @@ def uniform(*shape, low=0.0, high=1.0, **kwargs) -> Tensor: @staticmethod def scaled_uniform(*shape, **kwargs) -> Tensor: return scaled_uniform(*shape, **kwargs) - def multinomial(self:Tensor, num_samples:int = 1, replacement:bool = False) -> Tensor: + def multinomial(self: Tensor, num_samples: int = 1, replacement: bool = False) -> Tensor: + """Draws samples from the multinomial distribution based on the probability values in the tensor.""" + # Validate input dimensions and sample count assert 1 <= self.ndim <= 2 and num_samples > 0, f"{self.ndim=} must be 1 or 2 dim, {num_samples=} must be positive" assert replacement or num_samples == 1, "no replacement only supports num_samples = 1" + + # If tensor is 1D, add a new dimension at the beginning weight = self.unsqueeze(0) if self.ndim == 1 else self + + # Compute the cumulative distribution function (CDF) for the weights cdf = (cw := weight.cumsum(1)) / cw[:, -1].unsqueeze(1) + + # Generate uniform random samples unif_samples = Tensor.rand(num_samples, cdf.shape[0], 1) + + # Determine indices based on CDF indices = (unif_samples.expand((-1, -1, cdf.shape[1])) >= cdf).sum(2).permute((1, 0)) + + # If the original tensor was 1D, squeeze the resulting indices tensor return (indices.squeeze(0) if self.ndim == 1 else indices).cast(dtypes.int32) # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/gradcheck.py b/tests/gradcheck.py index 3f207c0..a19a002 100644 --- a/tests/gradcheck.py +++ b/tests/gradcheck.py @@ -1,16 +1,36 @@ +"""Contains functions for computing the Jacobian and performing gradient checks.""" + import numpy as np from edugrad.tensor import Tensor +from typing import Callable, List, Union + +def mask_like(like: np.ndarray, mask_inx: Union[int, List[int]], mask_value: float = 1.0) -> np.ndarray: + """Creates a mask array that is like the input array but with specified values masked. -def mask_like(like, mask_inx, mask_value=1.0): + Args: + like (array): The array to mimic in terms of shape. + mask_inx (int or array-like): Indices to mask. + mask_value (float, optional): The value to set at the masked indices. Defaults to 1.0. + + Returns: + array: Masked array with the same shape as `like`. + """ mask = np.zeros_like(like).reshape(-1) mask[mask_inx] = mask_value return mask.reshape(like.shape) +def jacobian(func: Callable, input: Tensor): + """Computes the Jacobian matrix for a function at a given input. -def jacobian(func, input): - output = func(input) + Args: + func: The function for which to compute the Jacobian. + input: The input tensor at which to evaluate the Jacobian. + Returns: + array: Jacobian matrix evaluated at the given input. + """ + output = func(input) ji = input.numpy().reshape(-1).shape[-1] jo = output.numpy().reshape(-1).shape[-1] J = np.zeros((jo, ji), dtype=np.float32) @@ -19,8 +39,8 @@ def jacobian(func, input): input.grad = None output = func(input) - # edugrad doesn't support slicing, tiny-hack to select - # the needed scalar an backpropagate only through it + # edugrad doesn't support slicing, workaround to select + # the needed scalar and backpropagate only through it. o_scalar = Tensor(mask_like(output.numpy(), o, 1.0)).mul(output).sum() o_scalar.backward() @@ -28,8 +48,17 @@ def jacobian(func, input): J[o, i] = grad return J +def numerical_jacobian(func: Callable, input: Tensor, eps:float=1e-3): + """Computes an approximation of the Jacobian matrix using finite differences. + + Args: + func: The function for which to approximate the Jacobian. + input: The input tensor at which to approximate the Jacobian. + eps: The epsilon for finite differences. Defaults to 1e-3. -def numerical_jacobian(func, input, eps=1e-3): + Returns: + array: Approximated Jacobian matrix. + """ output = func(input) ji = input.numpy().reshape(-1).shape[-1] @@ -47,8 +76,19 @@ def numerical_jacobian(func, input, eps=1e-3): NJ[:, i] = grad_approx return NJ +def gradcheck(func: Callable, input: Tensor, eps: float=1e-3, atol: float=1e-3, rtol: float=1e-3): + """Performs a gradient check by comparing the Jacobian to its numerical approximation. + + Args: + func: The function for which to perform the gradient check. + input : The input tensor for the function. + eps: Epsilon for finite differences in numerical Jacobian. Defaults to 1e-3. + atol: Absolute tolerance for np.allclose. Defaults to 1e-3. + rtol: Relative tolerance for np.allclose. Defaults to 1e-3. -def gradcheck(func, input, eps=1e-3, atol=1e-3, rtol=1e-3): + Returns: + bool: True if the computed Jacobian is close to its numerical approximation. + """ NJ = numerical_jacobian(func, input, eps) J = jacobian(func, input) - return np.allclose(J, NJ, atol=atol, rtol=rtol) \ No newline at end of file + return np.allclose(J, NJ, atol=atol, rtol=rtol)