Make test_tensor.py work

tostenzel · Jan 3, 2024 · b53e8d4 · b53e8d4
1 parent 5b5718e
commit b53e8d4
Show file tree

Hide file tree

Showing 7 changed files with 525 additions and 49 deletions.
diff --git a/applications/learn_mnist.py b/applications/learn_mnist.py
@@ -13,6 +13,7 @@ def parse(file):
 
     # parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
     BASE = os.path.dirname(__file__) + "/datasets"
+
     X_train = parse(BASE + "/mnist/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28 * 28)).astype(np.float32)
     Y_train = parse(BASE + "/mnist/train-labels-idx1-ubyte.gz")[8:]
     X_test = parse(BASE + "/mnist/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28 * 28)).astype(np.float32)
@@ -39,44 +40,6 @@ def __call__(self, x: Tensor):
         return x.dot(self.l1).log_softmax()
 
 
-# if __name__ == "__main__":
-#     NUM_STEPS = 100
-#     BS = 128
-#     LR = 0.001
-
-#     X_train, Y_train, X_test, Y_test = fetch_mnist()
-#     model = TinyConvNet()
-#     opt = optimizer.Adam([model.c1, model.c2, model.l1], lr=LR)
-
-#     with Tensor.train():
-#         for step in range(NUM_STEPS):
-#             # Get sample batches
-#             samp = np.random.randint(0, X_train.shape[0], size=(BS))
-#             xb, yb = Tensor(X_train[samp], requires_grad=False), Tensor(Y_train[samp])
-#             # Train
-#             out = model(xb)
-#             loss = out.sparse_categorical_crossentropy(yb)
-#             opt.zero_grad()
-#             loss.backward()
-#             opt.step()
-#             # Evaluate Train
-#             y_preds = out.numpy().argmax(axis=-1)
-#             acc = (y_preds == yb.numpy()).mean()
-#             if step == 0 or (step + 1) % 20 == 0:
-#                 print(f"Step {step+1:<3} | Loss: {loss.numpy():.4f} | Train Acc: {acc:.3f}")
-
-#     # Evaluate Test
-#     acc = 0
-#     for i in range(0, len(Y_test), BS):
-#         xb, yb = Tensor(X_test[i : i + BS], requires_grad=False), Tensor(Y_test[i : i + BS])
-#         out = model(xb)
-#         preds = out.argmax(axis=-1)
-#         acc += (preds == yb).sum().numpy()
-#     acc /= len(Y_test)
-#     print(f"Test Acc: {acc:.3f}")
-
-
-
 def train_and_evaluate_mnist(num_steps=100, batch_size=128, learning_rate=0.001):
     X_train, Y_train, X_test, Y_test = fetch_mnist()
     model = TinyConvNet()

diff --git a/edugrad/_tensor/tensor_reduce.py b/edugrad/_tensor/tensor_reduce.py
@@ -46,9 +46,9 @@ def _reduce(self, fxn: type[Function], axis: int | tuple[int, ...] | None, keepd
     return ret if keepdim else ret.reshape(shape=shape)
 
 
+# ----------------------------------------------------------------------------------------------------------------------
 # Functions that use the generic _reduce method for specific reduction operations.
 
-
 def tsum(tensor: Tensor, axis, keepdim):
     """Computes the sum of elements over the specified axis."""
     return tensor._reduce(function.Sum, axis, keepdim)
@@ -76,10 +76,9 @@ def std(tensor: Tensor, axis, keepdim, correction):
     square_sum = ((tensor - tensor.mean(axis=axis, keepdim=True)).square()).sum(axis=axis, keepdim=keepdim)
     return square_sum.div(prod(tensor.shape) / prod(square_sum.shape) - correction).sqrt()
 
-
+# ----------------------------------------------------------------------------------------------------------------------
 # Functions for softmax and its logarithmic variant, as well as argmax and argmin operations.
 
-
 def _softmax(tensor: Tensor, axis):
     """Helper function to compute softmax components."""
     m = tensor - tensor.max(axis=axis, keepdim=True)

diff --git a/edugrad/helpers.py b/edugrad/helpers.py
@@ -27,6 +27,12 @@ def flatten(list_: Iterator):
     return [item for sublist in list_ for item in sublist]
 
 
+def fully_flatten(l):
+    return [
+        item for sublist in l for item in (fully_flatten(sublist) if isinstance(sublist, (tuple, list)) else [sublist])
+    ]
+
+
 def argsort(x):
     """Return the indices that would sort an array.
 

diff --git a/edugrad/tensor.py b/edugrad/tensor.py
@@ -72,16 +72,22 @@ def __init__(
         # internal variables used for autograd graph construction
         self._ctx: Function | None = None
 
+        # --------------------------------------------------------------------------------------------------------------
+        # Handles Tensor(x) for x with different data types.
+        # We cast x = list(y) up to float32 for every case
+
         if isinstance(data, TensorData):
             assert dtype is None or dtype == data.dtype, "dtype doesn't match, and casting isn't supported"
 
-        elif isinstance(data, (int, float)):
-            data = TensorData.loadop(LoadOps.CONST, tuple(), dtype or Tensor.default_type, data)
-
-        elif data is None or data.__class__ is list:
-            assert dtype is None or dtype.np is not None, f"{dtype} doesn't have a numpy dtype"
-            data = TensorData(np.array([] if data is None else data, dtype=(dtype or Tensor.default_type).np))
-
+        elif isinstance(data, (bool, int, float)):
+            data = TensorData.loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_py(data), data)
+
+        elif isinstance(data, list):
+            data = TensorData(np.array(data, dtype=(dtype or Tensor.default_type).np))
+
+        elif data is None:
+            data = TensorData.loadop(LoadOps.EMPTY, (0,), dtype or dtypes.default_float)
+
         elif isinstance(data, bytes):
             data = TensorData(np.frombuffer(data, np.uint8))
 
@@ -253,7 +259,8 @@ def gather(self: Tensor, idx: Tensor, dim: int) -> Tensor: return gather(self, i
     # ------------------------------------------------------------------------------------------------------------------
     # tensor_combine_segment.py
 
-    def cat(self, *args, dim=0) -> Tensor: return cat(self, *args, dim=dim)    @staticmethod
+    def cat(self, *args, dim=0) -> Tensor: return cat(self, *args, dim=dim)
+    @staticmethod
     def stack(tensors, dim=0) -> Tensor: stack(tensors, dim)
     def repeat(self, repeats) -> Tensor: repeat(self, repeats)
     def chunk(self, num:int, dim:int=0) -> list[Tensor]: chunk(self, num, dim)

diff --git a/environment.yaml b/environment.yaml
@@ -11,3 +11,4 @@ dependencies:
 
   # Tests
   - pytest
+  - pytorch
diff --git a/tests/gradcheck.py b/tests/gradcheck.py
@@ -0,0 +1,54 @@
+import numpy as np
+from edugrad.tensor import Tensor
+
+
+def mask_like(like, mask_inx, mask_value=1.0):
+    mask = np.zeros_like(like).reshape(-1)
+    mask[mask_inx] = mask_value
+    return mask.reshape(like.shape)
+
+
+def jacobian(func, input):
+    output = func(input)
+
+    ji = input.numpy().reshape(-1).shape[-1]
+    jo = output.numpy().reshape(-1).shape[-1]
+    J = np.zeros((jo, ji), dtype=np.float32)
+
+    for o in range(jo):
+        input.grad = None
+        output = func(input)
+
+        # tinygrad doesn't support slicing, tiny-hack to select
+        # the needed scalar an backpropagate only through it
+        o_scalar = Tensor(mask_like(output.numpy(), o, 1.0)).mul(output).sum()
+        o_scalar.backward()
+
+        for i, grad in enumerate(input.grad.numpy().reshape(-1)):
+            J[o, i] = grad
+    return J
+
+
+def numerical_jacobian(func, input, eps=1e-3):
+    output = func(input)
+
+    ji = input.numpy().reshape(-1).shape[-1]
+    jo = output.numpy().reshape(-1).shape[-1]
+    NJ = np.zeros((jo, ji), dtype=np.float32)
+
+    for i in range(ji):
+        eps_perturb = mask_like(input.numpy(), i, mask_value=eps)
+
+        output_perturb_add = func(Tensor(input.numpy() + eps_perturb)).numpy().reshape(-1)
+        output_perturb_sub = func(Tensor(input.numpy() - eps_perturb)).numpy().reshape(-1)
+
+        grad_approx = ((output_perturb_add) - (output_perturb_sub)) / (2 * eps)
+
+        NJ[:, i] = grad_approx
+    return NJ
+
+
+def gradcheck(func, input, eps=1e-3, atol=1e-3, rtol=1e-3):
+    NJ = numerical_jacobian(func, input, eps)
+    J = jacobian(func, input)
+    return np.allclose(J, NJ, atol=atol, rtol=rtol)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,3 +11,4 @@ dependencies:

		# Tests
		- pytest
		- pytorch