diff --git a/LICENSE b/LICENSE
index e9c9427..7b2f163 100644
--- a/LICENSE
+++ b/LICENSE
@@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/edugrad/__init__.py b/edugrad/__init__.py
index cc1ec9f..d5e0fc6 100644
--- a/edugrad/__init__.py
+++ b/edugrad/__init__.py
@@ -1 +1 @@
-from edugrad.tensor import Tensor  # noqa: F401
+from edugrad.tensor import Tensor
diff --git a/edugrad/_tensor/tensor_broadcasted_binary_mlops.py b/edugrad/_tensor/tensor_broadcasted_binary_mlops.py
index 00dcee8..004ff4d 100644
--- a/edugrad/_tensor/tensor_broadcasted_binary_mlops.py
+++ b/edugrad/_tensor/tensor_broadcasted_binary_mlops.py
@@ -1,5 +1,8 @@
-"""This module implements broadcasted binary operations for Tensors, providing element-wise arithmetic operations that
-support broadcasting for tensors of different shapes."""
+"""Consists broadcasted binary operations for Tensors.
+
+These operations provide element-wise arithmetic operations that support broadcasting for tensors of different shapes.
+
+"""
 from __future__ import annotations
 
 import math
@@ -66,8 +69,7 @@ def _broadcasted(tensor: Tensor, y: Tensor | float, reverse: bool = False) -> tu
 
 
 def _to_float(tensor: Tensor, x: Tensor | float):
-    """Converts a tensor to float32 dtype if it is not already a Tensor and if it is suitable for certain operations
-    where float32 dtype is required.
+    """Converts a tensor to float32 dtype.
 
     Args:
         tensor (Tensor): The reference tensor to check compatibility.
@@ -81,6 +83,7 @@ def _to_float(tensor: Tensor, x: Tensor | float):
 
     return (
         x.data.base.op.arg
+        # tensor is not already a Tensor and suitable for certain operations where float32 dtype is required.
         if isinstance(x, Tensor)
         and x.data.is_unrealized_contiguous_const()
         and not x.requires_grad
@@ -139,7 +142,7 @@ def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
 
     x = tensor._to_float(x)
     if x.__class__ is not Tensor and not reverse:
-        # simple pow identities
+        # Simple pow identities
         if x < 0:
             return tensor.reciprocal().pow(-x)
         if x == 3.0:
@@ -153,7 +156,7 @@ def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
     if not isinstance(x, Tensor) and reverse and x > 0:
         return tensor.mul(math.log(x)).exp()
     ar = tensor.abs().log().mul(x).exp() if not reverse or isinstance(x, Tensor) else tensor.mul(math.log(abs(x))).exp()
-    # correct sign of negative numbers raised to a power (cos has a period of 2pi so we use it here to get the oddness of the power)
+    # Correct sign of negative numbers raised to a power (cos has a period of 2pi so we use it here to get the oddness of the power)
     sign = (
         (x * math.pi).cos()
         if isinstance(x, Tensor)
@@ -161,16 +164,16 @@ def pow(tensor: Tensor, x: Tensor | float, reverse=False) -> Tensor:
         if not reverse
         else (tensor * math.pi).cos()
     )
-    # we only need to correct the sign if the base is negative
+    # We only need to correct the sign if the base is negative
     base_sign = (
         (tensor.sign() if not reverse else x.sign() if isinstance(x, Tensor) else math.copysign(1, x)) - 1
     ) / -2
-    # we need 0 to be positive so we need to correct base_sign when the base is 0
+    # We need 0 to be positive so we need to correct base_sign when the base is 0
     base_sign = base_sign - (
         1.5
         * (1 - (tensor.sign().abs() if not reverse else x.sign().abs() if isinstance(x, Tensor) else abs(int(bool(x)))))
     )
-    # inject nan if the base is negative and the power is not an integer
+    # Inject nan if the base is negative and the power is not an integer
     to_nan = (
         ((x - x.trunc()) * 1e10).abs().clip(0, 1)
         if isinstance(x, Tensor)
diff --git a/edugrad/_tensor/tensor_create.py b/edugrad/_tensor/tensor_create.py
index 8f5b454..e183b4f 100644
--- a/edugrad/_tensor/tensor_create.py
+++ b/edugrad/_tensor/tensor_create.py
@@ -1,7 +1,16 @@
+"""Contains low-level operation entry points and helper functions for tensor creation and manipulation.
+
+It includes functions for creating tensors with specific properties (like being empty, random, or having specific
+values) and for random number generation.
+
+"""
+
 from __future__ import annotations
 import time
 import math
 
+from typing import Optional, Any
+
 from edugrad.dtypes import DType, dtypes
 from edugrad.helpers import argfix, prod, shape_int
 from edugrad.data import TensorData
@@ -11,7 +20,20 @@
 # creation low-level op entrypoint *****
 
 
-def _loadop(op, sz, dtype: DType | None = None, arg=None, **kwargs):
+def _loadop(op: LoadOps, sz: int, dtype: DType | None = None, arg: Any = None, **kwargs) -> Tensor:
+    """Internal helper function to create a Tensor with a specified operation.
+
+    Args:
+    - op: Operation to be performed for tensor creation.
+    - sz: Size of the tensor to be created.
+    - dtype: Data type of the tensor. Defaults to Tensor's default type if not provided.
+    - arg: Additional argument for the operation.
+    - kwargs: Additional keyword arguments.
+
+    Returns:
+    - Tensor: A new tensor created with the specified operation.
+
+    """
     from edugrad.tensor import Tensor
 
     assert isinstance(sz, int), f"cannot create with symbolic size {sz}"
@@ -20,7 +42,8 @@ def _loadop(op, sz, dtype: DType | None = None, arg=None, **kwargs):
     )
 
 
-def empty(*shape, **kwargs):
+def empty(*shape, **kwargs) -> Tensor:
+    """Creates an uninitialized tensor with the given shape."""
     from edugrad.tensor import Tensor
 
     return Tensor._loadop(LoadOps.EMPTY, prod(shape := argfix(*shape)), **kwargs).reshape(shape)
@@ -30,12 +53,23 @@ def empty(*shape, **kwargs):
 
 
 def manual_seed(seed=0):
+    """Sets the manual seed for random number generation."""
     from edugrad.tensor import Tensor
 
     Tensor._seed = seed
 
 
-def rand(*shape, **kwargs):
+def rand(*shape, **kwargs) -> Tensor:
+    """Creates a tensor with elements uniformly distributed between 0 and 1.
+
+    Args:
+    - shape: Variable length argument list for the dimensions of the tensor.
+    - kwargs: Additional keyword arguments.
+
+    Returns:
+    - Tensor: A tensor with random elements uniformly distributed.
+
+    """
     from edugrad.tensor import Tensor
 
     Tensor._seed += 1
@@ -46,25 +80,40 @@ def rand(*shape, **kwargs):
 # creation helper functions
 
 
-def full(shape: tuple[shape_int, ...], fill_value, **kwargs):
+def full(shape: tuple[shape_int, ...], fill_value, **kwargs) -> Tensor:
+    """Creates a tensor filled entirely with the specified fill value."""
     from edugrad.tensor import Tensor
 
     return Tensor(fill_value, **kwargs).reshape([1] * len(new_shape := argfix(shape))).expand(new_shape)
 
 
-def zeros(*shape, **kwargs):
+def zeros(*shape, **kwargs) -> Tensor:
+    """Creates a tensor filled entirely with zeros."""
     from edugrad.tensor import Tensor
 
     return Tensor.full(argfix(*shape), 0, **kwargs)
 
 
-def ones(*shape, **kwargs):
+def ones(*shape, **kwargs) -> Tensor:
+    """Creates a tensor filled entirely with ones."""
     from edugrad.tensor import Tensor
 
     return Tensor.full(argfix(*shape), 1, **kwargs)
 
 
-def arange(start, stop, step, **kwargs):
+def arange(start: int | float, stop: int | float | None, step: int | float, **kwargs) -> Tensor:
+    """Creates a 1D tensor with a sequence of numbers from start to stop with a step size.
+
+    Args:
+    - start: The start of the sequence.
+    - stop: The end of the sequence.
+    - step: The step size between each number in the sequence.
+    - kwargs: Additional keyword arguments.
+
+    Returns:
+    - Tensor: A 1D tensor containing a sequence of numbers.
+
+    """
     from edugrad.tensor import Tensor
 
     if stop is None:
@@ -72,7 +121,8 @@ def arange(start, stop, step, **kwargs):
     return Tensor.full((math.ceil((stop - start) / step),), step, **kwargs).cumsum() + (start - step)
 
 
-def eye(dim: int, **kwargs):
+def eye(dim: int, **kwargs) -> Tensor:
+    """Creates a 2D identity tensor."""
     from edugrad.tensor import Tensor
 
     return (
@@ -84,18 +134,21 @@ def eye(dim: int, **kwargs):
     )
 
 
-def full_like(self, fill_value, **kwargs):
+def full_like(tensor, fill_value, **kwargs) -> Tensor:
+    """Creates a tensor with the same shape as the given tensor, filled with a specified value."""
     from edugrad.tensor import Tensor
 
-    return Tensor.full(self.shape, fill_value=fill_value, dtype=kwargs.pop("dtype", self.dtype), **kwargs)
+    return Tensor.full(tensor.shape, fill_value=fill_value, dtype=kwargs.pop("dtype", tensor.dtype), **kwargs)
 
 
-def zeros_like(self, **kwargs):
-    return self.full_like(0, **kwargs)
+def zeros_like(tensor, **kwargs) -> Tensor:
+    """Creates a tensor with the same shape as the given tensor, filled with zeros."""
+    return tensor.full_like(0, **kwargs)
 
 
-def ones_like(self, **kwargs):
-    return self.full_like(1, **kwargs)
+def ones_like(tensor, **kwargs) -> Tensor:
+    """Creates a tensor with the same shape as the given tensor, filled with ones."""
+    return tensor.full_like(1, **kwargs)
 
 
 # -----------------------------------------------------------------------------------------------------------------------
@@ -103,6 +156,7 @@ def ones_like(self, **kwargs):
 
 
 def randn(*shape, dtype: DType | None, **kwargs) -> Tensor:
+    """Creates a tensor with elements sampled from a standard normal distribution."""
     from edugrad.tensor import Tensor
 
     # https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
@@ -117,18 +171,21 @@ def randn(*shape, dtype: DType | None, **kwargs) -> Tensor:
 
 
 def randint(*shape, low, high, **kwargs) -> Tensor:
+    """Creates a tensor with elements sampled uniformly from the discrete interval [low, high)."""
     from edugrad.tensor import Tensor
 
     return (Tensor.rand(*shape, **kwargs) * (high - low) + low).cast(dtypes.int32)
 
 
 def normal(*shape, mean, std, **kwargs) -> Tensor:
+    """Creates a tensor with elements sampled from a normal (Gaussian) distribution."""
     from edugrad.tensor import Tensor
 
     return (std * Tensor.randn(*shape, **kwargs)) + mean
 
 
 def uniform(*shape, low, high, **kwargs) -> Tensor:
+    """Creates a tensor with elements uniformly distributed over the interval [low, high)."""
     from edugrad.tensor import Tensor
 
     dtype = kwargs.pop("dtype", Tensor.default_type)
@@ -136,6 +193,11 @@ def uniform(*shape, low, high, **kwargs) -> Tensor:
 
 
 def scaled_uniform(*shape, **kwargs) -> Tensor:
+    """Creates a scaled tensor with elements uniformly distributed over the interval [-1.0, 1.0)
+
+    It is scaled by the inverse square root of the product of the tensor's shape.
+
+    """
     from edugrad.tensor import Tensor
 
     return Tensor.uniform(*shape, low=-1.0, high=1.0, **kwargs).mul(prod(shape) ** -0.5)
diff --git a/edugrad/_tensor/tensor_index_slice.py b/edugrad/_tensor/tensor_index_slice.py
index 59583b3..d5721c3 100644
--- a/edugrad/_tensor/tensor_index_slice.py
+++ b/edugrad/_tensor/tensor_index_slice.py
@@ -1,4 +1,4 @@
-from typing import Sequence, Optional, Tuple
+from typing import Sequence, Optional, Tuple, Union
 from collections import defaultdict
 
 from edugrad.dtypes import dtypes
@@ -36,6 +36,16 @@
 def __getitem__(
     tensor: "Tensor", val
 ) -> "Tensor":  # val: Union[int, slice, Tensor, None, Ellipsis, Tuple[Union[int, slice, Tensor, None, Ellipsis], ...]]
+    """Retrieves an element or a slice from the tensor based on the specified value.
+
+    Args:
+    - tensor (Tensor): The tensor from which to retrieve the element or slice.
+    - val: The index or slice object. Can be an integer, slice, Tensor, None, Ellipsis, or a combination thereof in a tuple.
+
+    Returns:
+    - Tensor: A tensor containing the retrieved element or slice.
+
+    """
     from edugrad.tensor import Tensor
 
     def normalize_int(e, i, dim_sz):
@@ -138,11 +148,27 @@ def normalize_int(e, i, dim_sz):
 
 
 def __setitem__(tensor: "Tensor", s, v):
+    """Assigns a value to a specified element or slice of the tensor.
+
+    Args:
+    - tensor (Tensor): The tensor to modify.
+    - s: The index or slice where the value will be assigned.
+    - v: The value to be assigned.
+
+    """
     return tensor.__getitem__(s).assign(v)
 
 
 # NOTE: using slice is discouraged and things should migrate to pad and shrink
 def tslice(tensor: "Tensor", arg: Sequence[Optional[Tuple[int, shape_int]]], value: float = 0) -> "Tensor":
+    """Applies slicing to a tensor, using padding and shrinking for manipulation.
+
+    Args:
+    - tensor (Tensor): The tensor to slice.
+    - arg (Sequence[Optional[Tuple[int, shape_int]]]): A sequence of tuples defining the slicing parameters.
+    - value (float): The padding value to be used if necessary.
+
+    """
     from edugrad.tensor import Tensor
 
     arg_ = tuple([a if a is not None else (0, s) for s, a in zip(tensor.shape, arg)])
@@ -154,7 +180,15 @@ def tslice(tensor: "Tensor", arg: Sequence[Optional[Tuple[int, shape_int]]], val
 
 
 def gather(tensor: "Tensor", idx: "Tensor", dim: int) -> "Tensor":
-    from edugrad._tensor import Tensor
+    """Gathers elements from the tensor along a specified dimension, according to indices specified in another tensor.
+
+    Args:
+    - tensor (Tensor): The tensor from which to gather elements.
+    - idx (Tensor): The tensor containing indices to gather.
+    - dim (int): The dimension along which to gather.
+
+    """
+    from edugrad.tensor import Tensor
 
     assert idx.ndim == tensor.ndim, "tensor.ndim must equal idx.ndim"
     assert all(
diff --git a/edugrad/_tensor/tensor_reshape.py b/edugrad/_tensor/tensor_reshape.py
index c1873c1..5a0ecea 100644
--- a/edugrad/_tensor/tensor_reshape.py
+++ b/edugrad/_tensor/tensor_reshape.py
@@ -1,81 +1,164 @@
+"""Contains various tensor manipulation operations that can change the shape of a tensor."""
+
 from __future__ import annotations
+from typing import List, Tuple, Union
 
 from edugrad.helpers import argfix, prod, shape_int
-
 import edugrad.function as function
 
 
-# movement mlops
+def reshape(tensor: Tensor, shape: int | tuple[int, ...], *args) -> Tensor:
+    """Reshapes a tensor to the specified new shape.
 
+    Args:
+        tensor: The tensor to reshape.
+        shape: The new shape for the tensor. Can be an int or a tuple of ints.
+        args: Additional arguments for the shape.
 
-def reshape(tensor: Tensor, shape, *args) -> Tensor:
+    """
     new_shape = argfix(shape, *args)
-    return function.Reshape.apply(
-        tensor,
-        shape=tuple(
-            [
-                -prod(tensor.shape) // prod(new_shape) if s == -1 else (s if s is not None else tensor.shape[i])
-                for i, s in enumerate(new_shape)
-            ]
-        ),
+    # Adjust the shape with special handling for -1 which infers the size from other dimensions
+    adjusted_shape = tuple(
+        -prod(tensor.shape) // prod(new_shape) if s == -1 else (s if s is not None else tensor.shape[i])
+        for i, s in enumerate(new_shape)
     )
+    return function.Reshape.apply(tensor, shape=adjusted_shape)
 
 
-def expand(tensor: Tensor, shape, *args) -> Tensor:
-    return function.Expand.apply(
-        tensor, shape=tuple([x if x != -1 else s for s, x in zip(tensor.shape, argfix(shape, *args))])
-    )
+def expand(tensor: Tensor, shape: int | tuple[int, ...], *args) -> Tensor:
+    """Expands the size of the tensor to the specified shape. -1 in the shape means the corresponding dimension is
+    unchanged.
+
+    Args:
+        tensor: The tensor to expand.
+        shape: The new shape for the tensor.
+        args: Additional arguments for the shape.
+
+    """
+    new_shape = argfix(shape, *args)
+    # Expand the tensor, allowing -1 to keep the original dimension size
+    expanded_shape = tuple(x if x != -1 else s for s, x in zip(tensor.shape, new_shape))
+    return function.Expand.apply(tensor, shape=expanded_shape)
+
 
+def permute(tensor: Tensor, order: int | tuple[int, ...], *args) -> Tensor:
+    """Permutes the tensor dimensions according to the specified order.
 
-def permute(tensor: Tensor, order, *args) -> Tensor:
-    return function.Permute.apply(tensor, order=argfix(order, *args))
+    Args:
+        tensor: The tensor to permute.
+        order: The desired order of dimensions.
+        args: Additional arguments for the order.
 
+    """
+    new_order = argfix(order, *args)
+    return function.Permute.apply(tensor, order=new_order)
 
-def flip(tensor: Tensor, axis, *args) -> Tensor:
-    return function.Flip.apply(tensor, axis=[x if x >= 0 else x + len(tensor.shape) for x in argfix(axis, *args)])
+
+def flip(tensor: Tensor, axis: int | list[int], *args) -> Tensor:
+    """Flips the tensor along the specified axes.
+
+    Args:
+        tensor: The tensor to flip.
+        axis: The axis or axes to flip.
+        args: Additional arguments for the axis.
+
+    """
+    # Normalize axis values to be positive
+    normalized_axes = [x if x >= 0 else x + len(tensor.shape) for x in argfix(axis, *args)]
+    return function.Flip.apply(tensor, axis=normalized_axes)
 
 
 def shrink(tensor: Tensor, arg: tuple[tuple[shape_int, shape_int] | None, ...]) -> Tensor:
-    return (
-        function.Shrink.apply(tensor, arg=tuple(x if x is not None else (0, s) for x, s in zip(arg, tensor.shape)))
-        if any(x is not None and x != (0, s) for x, s in zip(arg, tensor.shape))
-        else tensor
-    )
+    """Shrinks the tensor along each dimension according to the specified start and end indices.
+
+    Args:
+        tensor: The tensor to shrink.
+        arg: Tuple specifying start and end indices for each dimension.
+
+    """
+    # Determine the ranges for shrinking each dimension
+    shrink_arg = tuple(x if x is not None else (0, s) for x, s in zip(arg, tensor.shape))
+    # Apply shrink operation only if necessary
+    if any(x is not None and x != (0, s) for x, s in zip(arg, tensor.shape)):
+        return function.Shrink.apply(tensor, arg=shrink_arg)
+    return tensor
 
 
 def pad(tensor: Tensor, arg: tuple[tuple[int, int] | None, ...], value: float) -> Tensor:
+    """Pads the tensor along each dimension with the specified padding values.
+
+    Args:
+        tensor: The tensor to pad.
+        arg: Padding for each dimension.
+        value: The padding value.
+
+    """
     from edugrad.tensor import Tensor
 
+    # Determine padding for each dimension, defaulting to (0, 0)
+    pad_arg = tuple(x if x is not None else (0, 0) for x in arg)
+    # Apply padding operation if necessary
     if all(x is None or x == (0, 0) for x in arg):
         return tensor
-    ret = function.Pad.apply(tensor, arg=(narg := tuple(x if x is not None else (0, 0) for x in arg)))
-    return ret if 0 == value else ret + function.Pad.apply(Tensor.ones_like(tensor), arg=narg).where(0, value)
+    ret = function.Pad.apply(tensor, arg=pad_arg)
+    # Add the padding value if it's different from zero
+    return ret if value == 0 else ret + function.Pad.apply(Tensor.ones_like(tensor), arg=pad_arg).where(0, value)
 
 
-# (padding_left, padding_right, padding_top, padding_bottom)
 def pad2d(tensor: Tensor, padding: list[int] | tuple[int, ...], value: float) -> Tensor:
-    slc = [(-p0, s + p1) for p0, p1, s in zip(padding[::2], padding[1::2], tensor.shape[::-1])][::-1]
-    return tensor.slice([(0, s) for s in tensor.shape[: -(len(padding) // 2)]] + slc, value=value)
+    """Pads the tensor with 2D padding specified for each side.
+
+    Args:
+        tensor: The tensor to pad.
+        padding: Padding for each side (left, right, top, bottom).
+        value: The padding value.
+
+    """
+    # Calculate the slice indices for the 2D padding
+    slice_indices = [(-p0, s + p1) for p0, p1, s in zip(padding[::2], padding[1::2], tensor.shape[::-1])][::-1]
+    return tensor.slice([(0, s) for s in tensor.shape[: -(len(padding) // 2)]] + slice_indices, value=value)
+
+
+def transpose(tensor: Tensor, ax1: int, ax2: int) -> Tensor:
+    """Transposes two dimensions of the tensor.
 
+    Args:
+        tensor: The tensor to transpose.
+        ax1: The first axis to transpose.
+        ax2: The second axis to transpose.
 
-def transpose(tensor: Tensor, ax1, ax2) -> Tensor:
+    """
     order = list(range(len(tensor.shape)))
     order[ax1], order[ax2] = order[ax2], order[ax1]
     return tensor.permute(order)
 
 
-def _flatten(tensor: Tensor, start_dim):
+def _flatten(tensor: Tensor, start_dim: int) -> Tensor:
+    """Flattens the tensor from the specified start dimension.
+
+    Args:
+        tensor: The tensor to flatten.
+        start_dim: The dimension from which to start flattening.
+
+    """
     return tensor.reshape(shape=tensor.shape[:start_dim] + (-1,))
 
 
-def squeeze(tensor: Tensor, dim) -> Tensor:
+def squeeze(tensor: Tensor, dim: Optional[int]) -> Tensor:
+    """Squeezes the tensor by removing dimensions of size 1.
+
+    Args:
+        tensor: The tensor to squeeze.
+        dim: The specific dimension to squeeze. If None, all dimensions of size 1 are squeezed.
+
+    """
     if dim is None:
         return tensor if 1 not in tensor.shape else tensor.reshape(*[size for size in tensor.shape if size != 1])
     if dim <= 0 and tensor.ndim == 0:
-        return tensor  # This is to match PyTorch behavior
+        return tensor  # Match PyTorch behavior for 0-dimensional tensors
     if not -tensor.ndim <= dim < tensor.ndim:
         raise IndexError(
-            f"Dimension out of range (expected to be in range of [{-tensor.ndim if tensor.ndim > 0 else tensor.ndim-1}, {tensor.ndim-1 if tensor.ndim > 0 else tensor.ndim}], but got {dim})"
+            f"Dimension out of range (expected to be in range of [{-tensor.ndim}, {tensor.ndim-1}], but got {dim})"
         )
     if dim < 0:
         dim += tensor.ndim
@@ -86,7 +169,14 @@ def squeeze(tensor: Tensor, dim) -> Tensor:
     )
 
 
-def unsqueeze(tensor: Tensor, dim) -> Tensor:
+def unsqueeze(tensor: Tensor, dim: int) -> Tensor:
+    """Adds a dimension of size 1 to the tensor at the specified position.
+
+    Args:
+        tensor: The tensor to unsqueeze.
+        dim: The position to add the new dimension.
+
+    """
     if dim < 0:
         dim = len(tensor.shape) + dim + 1
     return tensor.reshape(tensor.shape[:dim] + (1,) + tensor.shape[dim:])
diff --git a/edugrad/helpers.py b/edugrad/helpers.py
index 4e4bff6..12ac36d 100644
--- a/edugrad/helpers.py
+++ b/edugrad/helpers.py
@@ -1,4 +1,4 @@
-"""Contains helper functions and DEBUG integer for verbose debugging used throughout the package."""
+"""Contains helper functions, a shape_int type and a DEBUG integer for verbose debugging used throughout the package."""
 
 from typing import Union, Tuple, Iterator, Any
 import os