Skip to content

Commit

Permalink
Write or improve docstrings and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tostenzel committed Jan 5, 2024
1 parent 816824a commit ea7ca14
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 20 deletions.
2 changes: 2 additions & 0 deletions applications/learn_mnist.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Train a classifier to recognize the hand-written digit on gray-scale images. and evaluate the results."""

import os
import gzip

Expand Down
49 changes: 49 additions & 0 deletions edugrad/_tensor/tensor_broadcasted_binary_mlops.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
This module implements broadcasted binary operations for Tensors, providing
element-wise arithmetic operations that support broadcasting for tensors of different shapes.
"""
from __future__ import annotations

import math
Expand All @@ -10,27 +15,51 @@


def _broadcasted(tensor: Tensor, y: Tensor | float, reverse: bool = False) -> tuple[Tensor, Tensor]:
"""Prepares two tensors for broadcasting to a common shape.
Args:
tensor (Tensor): The first tensor.
y (Tensor | float): The second tensor or a scalar value.
reverse (bool): If True, swaps the tensors before broadcasting.
Returns:
tuple[Tensor, Tensor]: A tuple of two tensors broadcasted to a common shape.
"""
from edugrad.tensor import Tensor

x: Tensor = tensor
# If y is not a tensor, convert it to a tensor with the same dtype as the input tensor.
# If the input tensor is empty, return a tensor full of the scalar value y.
if not isinstance(y, Tensor):
if 0 in x.shape:
return x, x.full_like(y)
y = Tensor(y, requires_grad=False, dtype=tensor.dtype if tensor.dtype != dtypes.bool else dtypes.float32)

# Swap tensors if reverse is True.
if reverse:
x, y = y, x

# Directly return tensors if they are already the same shape.
if (xshape := x.shape) == (yshape := y.shape):
return (x, y)

# Adjust shapes to make them broadcastable. This is done by prepending 1's to the shape
# of the shorter tensor until both shapes have the same length.
shape_delta = len(xshape) - len(yshape)
if shape_delta > 0:
y = y.reshape((1,) * shape_delta + yshape)
elif shape_delta < 0:
x = x.reshape((1,) * -shape_delta + xshape)

# Check if tensors are now the same shape. If yes, return them.
if (xshape := x.shape) == (yshape := y.shape):
return (x, y)

# Determine the final shape after broadcasting. This is the element-wise maximum
# of the shapes of the two tensors.
shape_ret = tuple([max(x, y) for x, y in zip(xshape, yshape)])

# Expand tensors to the final broadcasted shape.
if xshape != shape_ret:
x = x.expand(shape_ret)
if yshape != shape_ret:
Expand All @@ -39,6 +68,17 @@ def _broadcasted(tensor: Tensor, y: Tensor | float, reverse: bool = False) -> tu


def _to_float(tensor: Tensor, x: Tensor | float):
"""
Converts a tensor to float32 dtype if it is not already a Tensor and
if it is suitable for certain operations where float32 dtype is required.
Args:
tensor (Tensor): The reference tensor to check compatibility.
x (Tensor | float): The tensor or scalar to be converted.
Returns:
The converted tensor or the original scalar.
"""
from edugrad.tensor import Tensor

return (
Expand All @@ -52,13 +92,15 @@ def _to_float(tensor: Tensor, x: Tensor | float):


def add(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
"""Adds two tensors or a tensor and a scalar."""
from edugrad.tensor import Tensor

x = tensor._to_float(x)
return function.Add.apply(*tensor._broadcasted(x, reverse)) if x.__class__ is Tensor or x else tensor


def sub(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
"""Subtracts two tensors or a tensor and a scalar."""
from edugrad.tensor import Tensor

x = tensor._to_float(x)
Expand All @@ -70,6 +112,7 @@ def sub(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:


def mul(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
"""Multiplies two tensors or a tensor and a scalar."""
from edugrad.tensor import Tensor

x = tensor._to_float(x)
Expand All @@ -81,6 +124,7 @@ def mul(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:


def div(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
"""Divides two tensors or a tensor and a scalar."""
from edugrad.tensor import Tensor

x = tensor._to_float(x)
Expand All @@ -92,6 +136,7 @@ def div(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:


def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
"""Raises a tensor to the power of another tensor or a scalar."""
from edugrad.tensor import Tensor

x = tensor._to_float(x)
Expand Down Expand Up @@ -140,18 +185,22 @@ def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:


def matmul(tensor: Tensor, x: Tensor, reverse=False) -> Tensor:
"""Performs matrix multiplication."""
return x.dot(tensor) if reverse else tensor.dot(x)


def maximum(tensor: Tensor, x: Tensor | float) -> Tensor:
"""Computes the element-wise maximum of two tensors."""
return (tensor < x).detach().where(x, (tensor > x).detach().where(tensor, (tensor + x) / 2))


def minimum(tensor: Tensor, x: Tensor | float) -> Tensor:
"""Computes the element-wise minimum of two tensors."""
return -((-tensor).maximum(-x))


def where(tensor: Tensor, input_: Tensor | float, other: Tensor | float):
"""Selects elements from two tensors based on a condition tensor."""
x_, y = tensor._broadcasted(input_)
x, z = x_._broadcasted(other)
return function.Where.apply(x, *y._broadcasted(z))
73 changes: 67 additions & 6 deletions edugrad/_tensor/tensor_combine_segment.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,108 @@
"""Contains tensor operations like concatenation, stacking, repeating, and chunking."""

from __future__ import annotations
import math
from functools import reduce
from itertools import accumulate

from edugrad.helpers import all_int

def cat(tensor: Tensor, *args: Tensor, dim: int) -> Tensor:
"""Concatenates the given tensors along a specified dimension.
Args:
tensor (Tensor): The first tensor to concatenate.
*args (Tensor): Additional tensors to concatenate.
dim (int): The dimension along which to concatenate.
def cat(tensor, *args, dim) -> Tensor:
Returns:
Tensor: A new tensor resulting from concatenating the given tensors.
"""
from edugrad.tensor import Tensor

# Adjust the dimension if negative.
dim = (dim + len(tensor.shape)) if dim < 0 else dim

# Ensure all tensors have compatible shapes for concatenation.
assert all(
len(y.shape) == len(tensor.shape) and all(y.shape[i] == s for i, s in enumerate(tensor.shape) if i != dim)
for y in args
)

# Prepare arguments for concatenation.
catargs = [tensor, *args]

# Assert that tensors are not zero-dimensional.
assert all(t.shape for t in catargs), "zero-dimensional tensor cannot be concatenated"

# Calculate shapes and cumulative shapes for slicing.
shapes = [s.shape[dim] for s in catargs]
shape_cumsum = [0, *accumulate(shapes)]
slc = [[(0, 0) for _ in tensor.shape] for _ in catargs]

# Adjust slices for each tensor.
for shp, k, s in zip(shapes, shape_cumsum[:-1], slc):
s[dim] = (k, shape_cumsum[-1] - k - shp)

# Concatenate by padding and adding tensors.
return reduce(Tensor.__add__, [arg.pad(tuple(s)) for arg, s in zip(catargs, slc)])


@staticmethod
def stack(tensors, dim) -> Tensor:
def stack(tensors: list[Tensor], dim: int) -> Tensor:
"""Stacks a list of tensors along a new dimension.
Args:
tensors (list[Tensor]): The list of tensors to stack.
dim (int): The dimension along which to stack.
Returns:
Tensor: A new tensor resulting from stacking the given tensors.
"""
from edugrad.tensor import Tensor

# Unsqueeze the first tensor and prepare the rest.
first = tensors[0].unsqueeze(dim)
unsqueezed_tensors = [tensor.unsqueeze(dim) for tensor in tensors[1:]]
# checks for shapes and number of Falsedimensions delegated to cat

# Delegate checks for shapes and number of dimensions to cat.
return first.cat(*unsqueezed_tensors, dim=dim)


def repeat(tensor: Tensor, repeats) -> Tensor:
def repeat(tensor: Tensor, repeats: list[int]) -> Tensor:
"""Repeats a tensor along specified dimensions.
Args:
tensor (Tensor): The tensor to repeat.
repeats (list[int]): The number of repetitions for each dimension.
Returns:
Tensor: A new tensor with repeated values.
"""
base_shape = (1,) * (len(repeats) - tensor.ndim) + tensor.shape
new_shape = [x for b in base_shape for x in (1, b)]
expand_shape = [x for rs in zip(repeats, base_shape) for x in rs]
final_shape = [r * s for r, s in zip(repeats, base_shape)]

# Repeat the tensor by reshaping, expanding, and reshaping again.
return tensor.reshape(new_shape).expand(expand_shape).reshape(final_shape)


def chunk(tensor: Tensor, num: int, dim: int) -> list[Tensor]:
"""Splits a tensor into a specified number of chunks along a given dimension.
Args:
tensor (Tensor): The tensor to split.
num (int): The number of chunks to create.
dim (int): The dimension along which to split the tensor.
Returns:
list[Tensor]: A list of tensors representing the chunks.
"""
assert all_int(tensor.shape), f"does not support symbolic shape {tensor.shape}"
dim, step = dim + tensor.ndim if dim < 0 else dim, math.ceil(tensor.shape[dim] / num)
dim, step = (dim + tensor.ndim if dim < 0 else dim), math.ceil(tensor.shape[dim] / num)

# Generate slice parameters for each chunk.
slice_params = [[slice(None)] * dim + [slice(k, k + step)] for k in range(0, tensor.shape[dim], step)]

# Create each chunk by slicing the tensor.
return [tensor[tuple(sl)] for sl in slice_params]
6 changes: 6 additions & 0 deletions edugrad/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""Defines the allowed datatypes for intializing and casting Tensors.
For simplicity we only use bool, int32 and float32. Note that after applying operations, the results are usually
float32 (see `data.TensorData.elementwise()`).
"""
from typing import ClassVar, Dict, Optional, Final
import numpy as np
from dataclasses import dataclass
Expand Down
2 changes: 2 additions & 0 deletions edugrad/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Contains helper functions and DEBUG integer for verbose debugging used throughout the package."""

from typing import Union, Tuple, Iterator, Any
import os
import functools
Expand Down
33 changes: 27 additions & 6 deletions edugrad/tensor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Contain the tensor class that can be used for building neural networks with forward and backward pass.
"""Contains the tensor class that can be used for building neural networks with forward and backward pass.
The module contains the "high-level ops". These are syntax sugar and built on top of the "mid-level ops" containing the
the functions with forward and backward passes in Function.function which is build on top of the "low-level ops"
Expand Down Expand Up @@ -120,16 +120,25 @@ def dtype(self) -> DType:
# ------------------------------------------------------------------------------------------------------------------
# data handlers

def assign(self, x) -> Tensor:
# TODO: this is a hack for writing to DISK
if x.__class__ is not Tensor:
def assign(self, x: Any) -> Tensor:
"""Assigns the value of another tensor or array to the current tensor.
This method is a workaround for writing to disk and is used for in-place modification of tensor data.
"""
if not isinstance(x, Tensor):
# Convert x to a Tensor if it's not already one
x = Tensor(x, dtype=self.dtype)

assert self.shape == x.shape, f"assign shape mismatch {self.shape} != {x.shape}"
assert not x.requires_grad # tensor requires_grad is okay?
assert not x.requires_grad # Ensure x doesn't require gradient computation

if DEBUG >= 4:
print(f"assign {self.data} <- {x.data}")

# If dtype matches and assignment is allowed, perform the assignment
if self.dtype == x.dtype and self.data is not None and not getenv("DISALLOW_ASSIGN"):
x.data.output_buffer = self.data

self.data = x.data
return self

Expand Down Expand Up @@ -202,13 +211,25 @@ def uniform(*shape, low=0.0, high=1.0, **kwargs) -> Tensor:
@staticmethod
def scaled_uniform(*shape, **kwargs) -> Tensor: return scaled_uniform(*shape, **kwargs)

def multinomial(self:Tensor, num_samples:int = 1, replacement:bool = False) -> Tensor:
def multinomial(self: Tensor, num_samples: int = 1, replacement: bool = False) -> Tensor:
"""Draws samples from the multinomial distribution based on the probability values in the tensor."""
# Validate input dimensions and sample count
assert 1 <= self.ndim <= 2 and num_samples > 0, f"{self.ndim=} must be 1 or 2 dim, {num_samples=} must be positive"
assert replacement or num_samples == 1, "no replacement only supports num_samples = 1"

# If tensor is 1D, add a new dimension at the beginning
weight = self.unsqueeze(0) if self.ndim == 1 else self

# Compute the cumulative distribution function (CDF) for the weights
cdf = (cw := weight.cumsum(1)) / cw[:, -1].unsqueeze(1)

# Generate uniform random samples
unif_samples = Tensor.rand(num_samples, cdf.shape[0], 1)

# Determine indices based on CDF
indices = (unif_samples.expand((-1, -1, cdf.shape[1])) >= cdf).sum(2).permute((1, 0))

# If the original tensor was 1D, squeeze the resulting indices tensor
return (indices.squeeze(0) if self.ndim == 1 else indices).cast(dtypes.int32)

# ------------------------------------------------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit ea7ca14

Please sign in to comment.