diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
new file mode 100644
index 00000000..a2059fde
--- /dev/null
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,22 @@
+name: Nightly
+
+on:
+  schedule:
+    - cron: '0 2 * * *' # run at 2 AM UTC
+
+jobs:
+  test-against-pre-releases-of-dependencies:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox -e pre-release
diff --git a/.github/workflows/tox_run.yml b/.github/workflows/tox_run.yml
index e8e45320..533f2112 100644
--- a/.github/workflows/tox_run.yml
+++ b/.github/workflows/tox_run.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       max-parallel: 3
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.7, 3.8, 3.9]
       fail-fast: false
 
     steps:
@@ -57,11 +57,11 @@ jobs:
       run: tox -e coverage
 
 
-  py39:
+  py310:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9"]
+        python-version: ["3.10"]
       fail-fast: false
 
     steps:
@@ -74,8 +74,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install tox
-    - name: Python 3.9
-      run: tox -e py39
+    - name: Python 3.10
+      run: tox -e py310
 
   minimum_numpy:
     runs-on: ubuntu-latest
diff --git a/README.md b/README.md
index dfb254ba..d58a79ab 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![Documentation Status](https://readthedocs.org/projects/mygrad/badge/?version=latest)](https://mygrad.readthedocs.io/en/latest/?badge=latest)
 [![Automated tests status](https://github.com/rsokl/MyGrad/workflows/Tests/badge.svg)](https://github.com/rsokl/MyGrad/actions?query=workflow%3ATests+branch%3Amaster)
 [![PyPi version](https://img.shields.io/pypi/v/mygrad.svg)](https://pypi.python.org/pypi/mygrad)
-![Python version support](https://img.shields.io/badge/python-3.7%20&#8208;%203.9-blue.svg)
+![Python version support](https://img.shields.io/badge/python-3.7%20&#8208;%203.10-blue.svg)
 
 # [MyGrad's Documentation](https://mygrad.readthedocs.io/en/latest/)
 
@@ -22,8 +22,15 @@ MyGrad is a lightweight library that adds automatic differentiation to NumPy –
 array([2., 4., 6.])
 ```
 
-MyGrad's primary goal is to make automatic differentiation an accessible and easy to use across the Python/NumPy ecosystem.
-As such, it strives to behave and feel exactly like NumPy so that users need not learn yet another array-based math library.  
+MyGrad's primary goal is to make automatic differentiation accessible and easy to use across the Python/NumPy ecosystem.
+As such, it strives to behave and feel exactly like NumPy so that users need not learn yet another array-based math library.
+Of the various modes and flavors of auto-diff, MyGrad supports backpropagation from a scalar quantity.   
+
+Installing MyGrad:
+
+```shell script
+pip install mygrad
+```
 
 NumPy's ufuncs are richly supported; e.g. we can autodiff through in-place targets and boolean masks:  
 
@@ -102,9 +109,9 @@ array([-1.,  0., 10.])
 The following is an example of using `mygrad` to compute the [hinge loss](https://en.wikipedia.org/wiki/Hinge_loss) of classification scores and to "backpropagate" through (compute the gradient of) this loss. This example demonstrates some of mygrad's ability to perform backpropagation through broadcasted operations, basic indexing, advanced indexing, and in-place assignments.
 
 ```python
->>> from mygrad import Tensor
+>>> import mygrad as mg
 >>> import numpy as np
->>> class_scores = Tensor(10 * np.random.rand(100, 10))         # 100 samples, 10 possible classes for each
+>>> class_scores = 10 * mg.random.rand(100, 10) # 100 samples, 10 possible classes for each
 >>> class_labels = np.random.randint(low=0, high=10, size=100)  # correct label for each datum
 >>> class_labels = (range(len(class_labels)), class_labels)
 >>> correct_class_scores = class_scores[class_labels]
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e131f7e4..b8d8dd08 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,7 @@
 numpy==1.18.1
 numba==0.51.2
 llvmlite==0.34.0
-sphinx==3.0.4
-numpydoc>=1.0.0
-sphinx-rtd-theme==0.5.0
+sphinx==3.5.4
+numpydoc==1.1.0
+sphinx-rtd-theme==0.5.2
 matplotlib>=3.0.0
\ No newline at end of file
diff --git a/docs/source/changes.rst b/docs/source/changes.rst
index c6e706d0..83114214 100644
--- a/docs/source/changes.rst
+++ b/docs/source/changes.rst
@@ -6,6 +6,48 @@ This is a record of all past mygrad releases and what went into them,
 in reverse chronological order. All previous releases should still be available
 on pip.
 
+.. _v2.1.0:
+
+------------------
+2.1.0 - 2022-01-01
+------------------
+
+New Functions and Utilities
+---------------------------
+
+The following differentiable functions are now supported by MyGrad, and "drop-in" overrides for their NumPy counterparts are supported as well.
+
+ - :func:`~mygrad.atleast_1d`
+ - :func:`~mygrad.atleast_2d`
+ - :func:`~mygrad.atleast_3d`
+
+Basic tensor save/load functionality has been added (thanks to @kw-0).
+
+ - :func:`~mygrad.save`
+ - :func:`~mygrad.load`
+
+Improvements
+------------
+
+- :func:`~mygrad.clip` and ``Tensor.clip`` now accept an ``out`` target, permitting in-place operations. 
+- The method ``Tensor.__index__()`` is now implemented, which permits scalar integer-valued tensors to be used to index into Python sequences.
+- Added Python 3.10 to our automated test matrix. 
+
+Compatibility-Breaking Changes
+------------------------------
+
+- In accordance with `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_ we are dropping support for NumPy versions below 1.19. However, MyGrad will not drop support for Python 3.7; to remain as lightweight and flexible as possible we will support minor versions of Python up until their EOL or until our minimal NumPy dependency drops support -- whichever occurs first.
+- The interface to :func:`~mygrad.arange` was changed from ``arange(start, stop=None, step=None, ...)`` to ``arange([start,] stop[, step,], ...)``. This provides exact parity with NumPy's arange function.
+- The derivatives of :func:`~mygrad.absolute` and :func:`~mygrad.linalg.norm` have been revised such that in cases where the derivatives used to be ``nan``, those entries will now be ``0``. Both functions can now be passed ``nan_to_num=False`` to enable the previous, more rigorous behavior. See `PR #379 <https://github.com/rsokl/MyGrad/pull/379>`_ for more details.
+
+.. _v2.0.2:
+
+------------------
+2.0.2 - 2021-04-10
+------------------
+
+Exposes :func:`~mygrad.execute_op` at top-level namespace
+
 .. _v2.0.1:
 
 ------------------
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e2a7d116..287f09d8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -88,7 +88,7 @@
 
 
 def setup(app):
-    app.add_stylesheet("my_theme.css")
+    app.add_css_file("my_theme.css")
     # app.add_javascript("https://www.googletagmanager.com/gtag/js?id=UA-115029372-1")
     # app.add_javascript("gtag.js")
 
diff --git a/docs/source/generated/mygrad.atleast_1d.rst b/docs/source/generated/mygrad.atleast_1d.rst
new file mode 100644
index 00000000..cf1160fa
--- /dev/null
+++ b/docs/source/generated/mygrad.atleast_1d.rst
@@ -0,0 +1,6 @@
+mygrad.atleast_1d
+=================
+
+.. currentmodule:: mygrad
+
+.. autofunction:: atleast_1d
\ No newline at end of file
diff --git a/docs/source/generated/mygrad.atleast_2d.rst b/docs/source/generated/mygrad.atleast_2d.rst
new file mode 100644
index 00000000..a1033251
--- /dev/null
+++ b/docs/source/generated/mygrad.atleast_2d.rst
@@ -0,0 +1,6 @@
+mygrad.atleast_2d
+=================
+
+.. currentmodule:: mygrad
+
+.. autofunction:: atleast_2d
\ No newline at end of file
diff --git a/docs/source/generated/mygrad.atleast_3d.rst b/docs/source/generated/mygrad.atleast_3d.rst
new file mode 100644
index 00000000..3666a9a4
--- /dev/null
+++ b/docs/source/generated/mygrad.atleast_3d.rst
@@ -0,0 +1,6 @@
+mygrad.atleast_3d
+=================
+
+.. currentmodule:: mygrad
+
+.. autofunction:: atleast_3d
\ No newline at end of file
diff --git a/docs/source/generated/mygrad.load.rst b/docs/source/generated/mygrad.load.rst
new file mode 100644
index 00000000..706e3ed1
--- /dev/null
+++ b/docs/source/generated/mygrad.load.rst
@@ -0,0 +1,6 @@
+mygrad.load
+===========
+
+.. currentmodule:: mygrad
+
+.. autofunction:: load
\ No newline at end of file
diff --git a/docs/source/generated/mygrad.save.rst b/docs/source/generated/mygrad.save.rst
new file mode 100644
index 00000000..94eba1be
--- /dev/null
+++ b/docs/source/generated/mygrad.save.rst
@@ -0,0 +1,6 @@
+mygrad.save
+===========
+
+.. currentmodule:: mygrad
+
+.. autofunction:: save
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index ccfbeac4..e7574cb3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -5,15 +5,17 @@
 
 MyGrad
 ======
-MyGrad is a lightweight library that adds automatic differentiation to NumPy – its only dependency is NumPy!
+MyGrad is a lightweight library that adds automatic differentiation to NumPy – its only 
+dependency is NumPy. Simply "drop in" a MyGrad tensor into your NumPy-based code, and 
+start differentiating!
 
-.. code:: python
+.. code-block:: pycon
 
    >>> import mygrad as mg
    >>> import numpy as np
 
-   >>> x = mg.tensor([1., 2., 3.])  # like numpy.array, but supports backprop!
-   >>> f = np.sum(x * x)  # tensors work with numpy functions!
+   >>> x = mg.tensor([1., 2., 3.])  # like numpy.array, but supports backprop
+   >>> f = np.sum(x * x)  # tensors can be passed directly to native numpy functions!
    >>> f.backward() # triggers automatic differentiation
    >>> x.grad  # stores [df/dx0, df/dx1, df/dx2]
    array([2., 4., 6.])
@@ -21,11 +23,12 @@ MyGrad is a lightweight library that adds automatic differentiation to NumPy –
 
 MyGrad's primary goal is to make automatic differentiation an accessible and easy to use across the Python/NumPy ecosystem.
 As such, it strives to behave and feel exactly like NumPy so that users need not learn yet another array-based math library.
+Of the various modes and flavors of auto-diff, MyGrad supports backpropagation from a scalar quantity.
 
 NumPy's ufuncs are richly supported. We can even differentiate through an operation that occur in-place on a tensor and applies a boolean mask to
 the results:
 
-.. code:: python
+.. code-block:: pycon
 
    >>> x = mg.tensor([1., 2., 3.])
    >>> y = mg.zeros_like(x)
@@ -39,7 +42,7 @@ NumPy's `view semantics <https://www.pythonlikeyoumeanit.com/Module3_Introducing
 indexing and similar operations on tensors will produce a "view" of that tensor's data, thus a tensor and its view share memory.
 This relationship will also manifest between the derivatives stored by a tensor and its views!
 
-.. code:: python
+.. code-block:: pycon
 
    >>> x = mg.arange(9.).reshape(3, 3)
    >>> diag_view = np.einsum("ii->i", x)  # returns a view of the diagonal elements of `x`
@@ -74,7 +77,7 @@ This relationship will also manifest between the derivatives stored by a tensor
 
 Basic and advanced indexing is fully supported
 
-.. code:: python
+.. code-block:: pycon
 
    >>> (x[x < 4] ** 2).backward()
    >>> x.grad
@@ -86,7 +89,7 @@ Basic and advanced indexing is fully supported
 NumPy arrays and other array-likes play nicely with MyGrad's tensor. These behave like constants
 during automatic differentiation
 
-.. code:: python
+.. code-block:: pycon
 
    >>> x = mg.tensor([1., 2., 3.])
    >>> constant = [-1., 0., 10]  # can be a numpy array, list, or any other array-like
@@ -113,5 +116,6 @@ during automatic differentiation
    math
    indexing
    nnet
+   io
    graph_viz
    changes
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 9ce282b1..8606657b 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -24,6 +24,11 @@ navigate to the MyGrad directory, then run:
 Support for Python and NumPy
 ----------------------------
 MyGrad abides by the `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_ recommendation, and adopts
-a common “time window-based” policy for support of Python and NumPy versions.
-
-Accordingly, MyGrad's drop schedule for Python and NumPy can be found `here <https://numpy.org/neps/nep-0029-deprecation_policy.html#drop-schedule>`_.
+a common “time window-based” policy for support of NumPy versions. Accordingly, MyGrad's drop schedule for NumPy versions can be found `here <https://numpy.org/neps/nep-0029-deprecation_policy.html#drop-schedule>`_. 
+
+Note, however, that MyGrad will maintain a wider window of support for minor Python 
+versions than is specified by NEP 29. Because our only dependency is NumPy, and because
+we strive to remain an exceptionally lightweight and flexible dependency to our users, 
+we will support minor versions of Python until their end of life, *or* until our lowest
+supported version of NumPy drops support for that version of Python -- whichever occurs
+first.
\ No newline at end of file
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
index 9e26dabe..22bc62f1 100644
--- a/docs/source/intro.rst
+++ b/docs/source/intro.rst
@@ -19,6 +19,7 @@ MyGrad is a lightweight library that adds automatic differentiation to NumPy –
 Its primary goal is to make automatic differentiation an accessible and easy to use across the Python/NumPy ecosystem.
 As such, it strives to behave and feel exactly like NumPy so that users need not learn yet another array-based math library.
 You can pass MyGrad's :class:`~mygrad.Tensor` to NumPy's functions in order to make them differentiable!
+Of the various modes and flavors of auto-diff, MyGrad supports backpropagation from a scalar quantity.
 
 
 A Simple Application
diff --git a/docs/source/io.rst b/docs/source/io.rst
new file mode 100644
index 00000000..89f0c643
--- /dev/null
+++ b/docs/source/io.rst
@@ -0,0 +1,12 @@
+Input and Output
+****************
+
+.. currentmodule:: mygrad
+
+NumPy binary files (NPY, NPZ)
+-----------------------------
+.. autosummary::
+   :toctree: generated/
+
+   save
+   load
\ No newline at end of file
diff --git a/docs/source/operation.rst b/docs/source/operation.rst
index 8369d88d..46ccbe13 100644
--- a/docs/source/operation.rst
+++ b/docs/source/operation.rst
@@ -1,14 +1,16 @@
 Writing Your Own Operations
 ***************************
 
-Let's write our own "multiply" operation.
+Let's write our own "multiply" operation. There are two components to doing this:
+ - Defining an operation class (a subclass of :class:`~mygrad.operation_base.Operation`)
+ - Writing a function that ultimately calls ``mygrad.execute_op(YourOp, ...)``
 
 .. code:: python
 
    import numpy as np
 
    import mygrad as mg
-   from mygrad import prepare_op
+   from mygrad import execute_op
    from mygrad.operation_base import Operation
    from mygrad.typing import ArrayLike
 
@@ -59,6 +61,9 @@ Let's write our own "multiply" operation.
            x_arr = x.data
            y_arr = y.data
 
+           # The operation need not incorporate specialized logic for
+           # broadcasting. The appropriate sum-reductions will be performed
+           # by MyGrad's autodiff system.
            if index == 0:  # backprop through a
                return grad * y.data  # ∂ℒ/∂x = (∂ℒ/∂f)(∂f/∂x)
            elif index == 1:  # backprop through b
@@ -67,22 +72,22 @@ Let's write our own "multiply" operation.
 
    # Our function stitches together our operation class with the
    # operation arguments via `mygrad.prepare_op`
-   def custom_multiply(x: ArrayLike, y: ArrayLike) -> mg.Tensor:
-       # `prepare_op` will take care of casting `x` and `y` to tensors if
-       # they are not already tensors.
-       return prepare_op(CustomMultiply, x, y)
+   def custom_multiply(x: ArrayLike, y: ArrayLike, constant=None) -> mg.Tensor:
+       # `execute_op` will take care of:
+       #  - casting `x` and `y` to tensors if they are instead array-likes
+       #  - propagating 'constant' status to the resulting output based on the inputs
+       #  - handling in-place operations (specified via the `out` parameter)
+       return execute_op(CustomMultiply, x, y, constant=constant)
 
-We can now use our differentiable function! It will automatically be compatible
-with broadcasting; out operation need not account for broadcasting in either the
-forward pass or the backward pass.
+We can now use our differentiable function!
 
 .. code:: pycon
 
-   >> x = mg.tensor(2.0)
-   >> y = mg.tensor([1.0, 2.0, 3.0])
+   >>> x = mg.tensor(2.0)
+   >>> y = mg.tensor([1.0, 2.0, 3.0])
 
-   >> custom_multiply(x, y).backward()
-   >> x.grad, y.grad
+   >>> custom_multiply(x, y).backward()
+   >>> x.grad, y.grad
    (array(6.), array([2., 2., 2.]))
 
 Documentation for mygrad.Operation
diff --git a/docs/source/tensor.rst b/docs/source/tensor.rst
index bf5dbb2b..265dde99 100644
--- a/docs/source/tensor.rst
+++ b/docs/source/tensor.rst
@@ -61,17 +61,17 @@ graph - the graph is constructed as we carry out the forward-pass computation.
 >>> ℒ = 2 * x + y ** 2
 
 Invoking ``ℒ.backward()`` signals the computational graph to
-compute the total-derivative of ``f`` with respect to each one of its dependent
+compute the total-derivative of ``ℒ`` with respect to each one of its dependent
 variables. I.e. ``x.grad`` will store ``dℒ/dx`` and ``y.grad`` will store
-``dℒ/dy``. Thus we have back-propagated a gradient from ``f`` through our graph.
+``dℒ/dy``. Thus we have back-propagated a gradient from ``ℒ`` through our graph.
 
 Each tensor of derivatives is computed elementwise. That is, if ``x = Tensor(x0, x1, x2)``,
 then ``dℒ/dx`` represents ``[dℒ/d(x0), dℒ/d(x1), dℒ/d(x2)]``
 
->>> ℒ.backward()  # computes df/dx and df/dy
->>> x.grad  # df/dx
+>>> ℒ.backward()  # computes dℒ/dx and dℒ/dy
+>>> x.grad  # dℒ/dx
 array(6.0)
->>> y.grad  # df/dy
+>>> y.grad  # dℒ/dy
 array(4.0)
 >>> ℒ.grad
 array(1.0)  # dℒ/dℒ
diff --git a/docs/source/tensor_manipulation.rst b/docs/source/tensor_manipulation.rst
index 9818df83..35818239 100644
--- a/docs/source/tensor_manipulation.rst
+++ b/docs/source/tensor_manipulation.rst
@@ -30,6 +30,9 @@ Changing number of dimensions
 .. autosummary::
    :toctree: generated/
 
+   atleast_1d
+   atleast_2d
+   atleast_3d
    broadcast_to
    expand_dims
    squeeze
diff --git a/setup.cfg b/setup.cfg
index 323954b9..a350feed 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,13 +27,14 @@ omit = src/mygrad/_version.py
 
 
 [tox:tox]
-envlist = py37,py38,format,min_numpy
+envlist = py37,py38,py39,format,min_numpy
 
 [gh-actions]
 python =
   3.7: py37
   3.8: py38
   3.9: py39
+  3.10: py310
 
 [testenv]
 deps =
@@ -46,14 +47,14 @@ extras = rnn
 
 
 [testenv:min_numpy]
-deps = numpy==1.17
+deps = numpy==1.19
        {[testenv]deps}
 basepython = python3.7
 commands = pytest --hypothesis-profile ci \
            {posargs}
 extras =
 
-[testenv:py39]  # exclude numba dependency for now
+[testenv:py310]  # exclude numba dependency for now
 deps = pytest
        hypothesis
        scipy
@@ -81,3 +82,14 @@ commands =
     autoflake --recursive --in-place --remove-duplicate-keys --remove-unused-variables .
     isort .
     black .
+
+
+[testenv:pre-release]  # test against pre-releases of dependencies
+pip_pre = true
+deps = pytest
+       hypothesis
+       scipy
+basepython = python3.8
+commands = pytest --hypothesis-profile ci \
+           {posargs}
+extras =
diff --git a/setup.py b/setup.py
index 66e6d191..f44bd68c 100644
--- a/setup.py
+++ b/setup.py
@@ -20,8 +20,8 @@
     "Topic :: Scientific/Engineering",
 ]
 
-INSTALL_REQUIRES = ["numpy >= 1.17"]
-TESTS_REQUIRE = ["pytest >= 3.8", "hypothesis >= 5.32.0", "scipy"]
+INSTALL_REQUIRES = ["numpy >= 1.19"]
+TESTS_REQUIRE = ["pytest >= 3.8", "hypothesis >= 6.17.1", "scipy"]
 
 DESCRIPTION = "Brings drop-in automatic differentiation to NumPy"
 LONG_DESCRIPTION = """
diff --git a/src/mygrad/__init__.py b/src/mygrad/__init__.py
index caa89f82..fec278c2 100644
--- a/src/mygrad/__init__.py
+++ b/src/mygrad/__init__.py
@@ -32,6 +32,7 @@
 from mygrad.tensor_manip.tiling.funcs import *
 from mygrad.tensor_manip.transpose_like.funcs import *
 from mygrad.ufuncs._ufunc_creators import ufunc
+from ._io import load, save
 
 from . import random
 from ._version import get_versions
@@ -41,4 +42,4 @@
 
 
 setattr(Tensor, "clip", clip)
-prepare_op = Tensor._op
+execute_op = Tensor._op
diff --git a/src/mygrad/_io.py b/src/mygrad/_io.py
new file mode 100644
index 00000000..ad80ac7f
--- /dev/null
+++ b/src/mygrad/_io.py
@@ -0,0 +1,125 @@
+from pathlib import Path
+from typing import BinaryIO, Union
+
+import numpy as np
+
+import mygrad.tensor_base as tb
+
+_FileLike = Union[str, Path, BinaryIO]
+
+
+def save(file: _FileLike, tensor: tb.Tensor) -> None:
+    """Saves a tensor and its gradient information.
+
+    This docstring was adapted from that of numpy.save()
+
+    Parameters
+    ----------
+    file : str | Path | BinaryIO
+        The file or file-path that where the tensor data and its gradient
+        will be saved. Note that the file will be saved as a .npz file.
+
+    tensor : Tensor
+        The tensor to be saved. If it has an associated gradient, that will
+        be saved as well.
+
+    Notes
+    -----
+    This function uses ``numpy.savez(file, data=tensor.data, grad=tensor.grad)``
+    to save the tensor's data and its gradient. No ``grad`` field is included
+    if the tensor does not have a gradient.
+
+    See Also
+    --------
+    mygrad.load
+
+    Examples
+    --------
+    >>> import mygrad as mg
+    >>> from tempfile import TemporaryFile
+    >>> outfile = TemporaryFile()
+    >>> x = mg.arange(10.0)
+    >>> mg.save(outfile, x)
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> mg.load(outfile)
+    Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+
+    An example of saving a tensor that has an associated gradient.
+
+    >>> (x * x).backward()
+    >>> x.grad
+    array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])
+    >>> outfile = TemporaryFile()
+    >>> x = mg.arange(10.0)
+    >>> mg.save(outfile, x)
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> loaded = mg.load(outfile)
+    >>> loaded
+    Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+    >>> loaded.grad
+    array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])
+    """
+    if not isinstance(tensor, tb.Tensor):
+        raise TypeError(
+            f"mygrad.save requires a Tensor-type object, got type {type(tensor)}"
+        )
+
+    if tensor.grad is not None:
+        np.savez(file, data=tensor.data, grad=tensor.grad)
+    else:
+        np.savez(file, data=tensor.data)
+
+
+def load(file: _FileLike) -> tb.Tensor:
+    """Loads a saved Tensor and its gradient information (if applicable).
+
+    This docstring was adapted from that of numpy.load()
+
+    Parameters
+    ----------
+    file : str | Path | BinaryIO
+        The name of the file that holds the tensor data to load.
+
+    Returns
+    -------
+    loaded : Tensor
+        The loaded tensor (whose gradient will be loaded if it was saved).
+
+    See Also
+    --------
+    mygrad.save
+
+    Examples
+    --------
+    >>> import mygrad as mg
+    >>> from tempfile import TemporaryFile
+    >>> outfile = TemporaryFile()
+    >>> x = mg.arange(10.0)
+    >>> mg.save(outfile, x)
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> mg.load(outfile)
+    Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+
+    An example of saving a tensor that has an associated gradient.
+
+    >>> (x * x).backward()
+    >>> x.grad
+    array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])
+    >>> outfile = TemporaryFile()
+    >>> x = mg.arange(10.0)
+    >>> mg.save(outfile, x)
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> loaded = mg.load(outfile)
+    >>> loaded
+    Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+    >>> loaded.grad
+    array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])
+    """
+    loaded = np.load(file)
+
+    loaded_tensor = tb.tensor(loaded["data"])
+
+    if "grad" in loaded:
+        loaded_tensor.backward(loaded["grad"])
+
+    return loaded_tensor
diff --git a/src/mygrad/_utils/graph_tracking.py b/src/mygrad/_utils/graph_tracking.py
index d3300439..758941ee 100644
--- a/src/mygrad/_utils/graph_tracking.py
+++ b/src/mygrad/_utils/graph_tracking.py
@@ -2,7 +2,7 @@
 Provides user interface for suspending computational graph tracking and back-propagation
 """
 from functools import wraps
-from typing import Callable
+from typing import Callable, TypeVar, Any, cast, Optional
 
 import numpy as np
 
@@ -10,6 +10,7 @@
 
 __all__ = ["no_autodiff"]
 
+_T = TypeVar("_T", bound=Callable[..., Any])
 
 # If `False`, suspends all computational graph tracking and backprop
 TRACK_GRAPH = True  # type: bool
@@ -42,17 +43,6 @@ class _NoAutoDiff(ContextTracker):
     ...     # No graph-tracking will occur within
     ...     # the body of this function
     ...     pass
-
-    The following pattern is particularly useful for making a MyGrad-function
-    behave as if it were a pure NumPy function.
-
-    >>> @mg.no_autodiff(to_numpy=True)
-    ... def func_returns_array():
-    ...     # No graph-tracking will occur within
-    ...     # the body of this function.
-    ...     # And the output of the function will be
-    ...     # cast to a numpy array
-    ...     pass
     """
 
     _enter_set_value = False
@@ -71,7 +61,7 @@ def state(self, value: bool):
         global TRACK_GRAPH
         TRACK_GRAPH = value
 
-    def __call__(self, func: Callable, to_numpy: bool = False) -> Callable:
+    def __call__(self, func: _T, to_numpy: bool = False) -> _T:
         """Decorates a function so that it will have graph-tracking suspended
         during its execution.
 
@@ -94,7 +84,7 @@ def wrapper(*args, **kwargs):
                 out = func(*args, **kwargs)
             return out if not to_numpy else np.asarray(out)
 
-        return wrapper
+        return cast(_T, wrapper)
 
 
 no_autodiff = _NoAutoDiff()
diff --git a/src/mygrad/indexing_routines/funcs.py b/src/mygrad/indexing_routines/funcs.py
index 2ac30a04..78d98fd6 100644
--- a/src/mygrad/indexing_routines/funcs.py
+++ b/src/mygrad/indexing_routines/funcs.py
@@ -100,5 +100,5 @@ def where(
         raise ValueError("either both or neither of x and y should be given")
 
     return Tensor._op(
-        Where, x, y, op_kwargs=dict(condition=condition), constant=constant
+        Where, x, y, op_kwargs={"condition": condition}, constant=constant
     )
diff --git a/src/mygrad/linalg/funcs.py b/src/mygrad/linalg/funcs.py
index 2fdb5df4..5852dc23 100644
--- a/src/mygrad/linalg/funcs.py
+++ b/src/mygrad/linalg/funcs.py
@@ -22,6 +22,7 @@ def norm(
     axis: Optional[Union[int, Tuple[int]]] = None,
     keepdims: bool = False,
     *,
+    nan_to_num: bool = True,
     constant: Optional[bool] = None,
 ) -> Tensor:
     r"""Vector norm.
@@ -53,6 +54,10 @@ def norm(
         result as dimensions with size one.  With this option the result will
         broadcast correctly against the original `x`.
 
+    nan_to_num : bool, optional (default=True)
+        If `True` then gradients that would store nans due to the presence of
+        zeros in `x` will instead store zeros in those places.
+
     constant : Optional[bool]
         If ``True``, this tensor is treated as a constant, and thus does not
         facilitate back propagation (i.e. ``constant.grad`` will always return
@@ -113,14 +118,26 @@ def norm(
     >>> l2_norms
     Tensor([3.74165739, 1.        ])
 
-    The presence of the elementwise absolute values in the norm means that zero-valued
-    entries in a vectors have an undefined derivative.
+    The presence of the elementwise absolute values in the norm operation means that zero-valued entries in any of 
+    input vectors have an undefined derivative. When `nan_to_num=False` is specified these derivatives will be reported
+    as `nan`, otherwise they will be made to be 0.0.
 
+    >>> l2_norms = mg.linalg.norm(x, axis=1, ord=2, nan_to_num=True)
     >>> l2_norms.backward()
     >>> x.grad
     array([[0.26726124, 0.53452248, 0.80178373],
            [1.        ,        nan,        nan]])
 
+    This is rigorously true, but is often not the desired behavior in autodiff applications.
+    Rather, it can be preferable to use `0.0` to fill these undefined derivatives.
+    This is the default behavior, when `nan_to_num` is not specified.
+
+    >>> l2_norms = mg.linalg.norm(x, axis=1, ord=2, nan_to_num=False)  # default setting: `nan_to_num=False`
+    >>> l2_norms.backward()
+    >>> x.grad
+    array([[0.26726124, 0.53452248, 0.80178373],
+          [1.        ,          0.,         0.]])
+
     L1 norms along each of the three columns:
 
     >>> mg.linalg.norm(x, axis=0, ord=1)
@@ -143,7 +160,12 @@ def norm(
     return Tensor._op(
         Norm,
         x,
-        op_kwargs={"axis": axis, "keepdims": keepdims, "ord": ord},
+        op_kwargs={
+            "axis": axis,
+            "keepdims": keepdims,
+            "ord": ord,
+            "nan_to_num": nan_to_num,
+        },
         constant=constant,
     )
 
diff --git a/src/mygrad/linalg/ops.py b/src/mygrad/linalg/ops.py
index 1de5142b..4ee398ff 100644
--- a/src/mygrad/linalg/ops.py
+++ b/src/mygrad/linalg/ops.py
@@ -257,8 +257,17 @@ def _expand_dims(x, axis, original_ndmin):
 
 
 class Norm(Operation):
-    def __call__(self, tensor, ord=None, axis=None, keepdims=False):
+    def __call__(
+        self,
+        tensor,
+        ord=None,
+        axis=None,
+        keepdims: bool = False,
+        *,
+        nan_to_num: bool = True
+    ):
         self.variables = (tensor,)
+        self._nan_to_num = nan_to_num
         out = np.linalg.norm(tensor.data, ord=ord, axis=axis, keepdims=keepdims)
 
         if isinstance(ord, Real) and np.isinf(ord):  # pragma: no cover
@@ -304,7 +313,8 @@ def backward_var(self, grad: np.ndarray, index: int, **kwargs) -> np.ndarray:
             # is broadcast-compatible with `tensor`
             grad = _expand_dims(grad, axis=self.axis, original_ndmin=tensor.ndim)
 
-        invalid_derivative = np.where(x == 0)
+        if not self._nan_to_num:
+            invalid_derivative = np.where(x == 0)
 
         if self.ord == 1:
             out = np.sign(x)
@@ -327,5 +337,7 @@ def backward_var(self, grad: np.ndarray, index: int, **kwargs) -> np.ndarray:
             out *= np.sign(x)
             out *= _norm
             out *= grad
-        out[invalid_derivative] = np.nan
+
+        if not self._nan_to_num:
+            out[invalid_derivative] = np.nan
         return out
diff --git a/src/mygrad/math/misc/funcs.py b/src/mygrad/math/misc/funcs.py
index d2df6dc5..e141dab8 100644
--- a/src/mygrad/math/misc/funcs.py
+++ b/src/mygrad/math/misc/funcs.py
@@ -32,6 +32,7 @@ def absolute(
     where: Mask = True,
     dtype: DTypeLikeReals = None,
     constant: Optional[bool] = None,
+    nan_to_num: bool = True,
 ) -> Tensor:  # pragma: no cover
     """The absolute value, computed elementwise.
 
@@ -57,6 +58,10 @@ def absolute(
 
         Integer-type tensors must be constant.
 
+    nan_to_num : bool, optional (default=True)
+        If `True` then gradients that would store nans due to the presence of
+        zeros in `x` will instead store zeros in those places.
+
     where : Mask
         This condition is broadcast over the input. At locations where the
         condition is True, the ``out`` tensor will be set to the ufunc result.
@@ -85,6 +90,22 @@ def absolute(
     >>> mg.absolute([-1.2, 1.2])
     Tensor([ 1.2,  1.2])
 
+    The absolute-value function is not differentiable at `x=0.0`.
+    By default the derivative at this point is treated as 0.
+
+    >>> x = mg.tensor([-2.0, 0.0, 2.0])
+    >>> mg.absolute(x).backward()
+    >>> x.grad
+    np.array([-1., 0., 1.])
+
+    However a more rigorous behavior can be enabled such that the
+    undefined derivative will be returned as `nan`.
+
+    >>> x = mg.tensor([-2.0, 0.0, 2.0])
+    >>> mg.absolute(x, nan_to_num=False).backward()
+    >>> x.grad
+    np.array([-1., nan, 1.])
+
     Plot the function and its derivate over ``[-10, 10]``:
 
     .. plot::
@@ -418,7 +439,12 @@ def minimum(
 
 @implements_numpy_override()
 def clip(
-    a: ArrayLike, a_min: ArrayLike, a_max: ArrayLike, *, constant: Optional[bool] = None
+    a: ArrayLike,
+    a_min: ArrayLike,
+    a_max: ArrayLike,
+    out: Optional[Union[np.ndarray, Tensor]] = None,
+    *,
+    constant: Optional[bool] = None,
 ) -> Tensor:
     """Clip (limit) the values in an array.
 
@@ -449,6 +475,11 @@ def clip(
         `None`. If `a_min` or `a_max` are ArrayLike, then the three
         arrays will be broadcasted to match their shapes.
 
+    out : Optional[Union[ndarray, Tensor]]
+        A location into which the result is stored. If provided, it must have
+        a shape that the inputs broadcast to. If not provided or None, a
+        freshly-allocated tensor is returned.
+
     constant : bool, optional(default=False)
         If ``True``, the returned tensor is a constant (it
         does not backpropagate a gradient)
@@ -474,11 +505,10 @@ def clip(
         raise ValueError("`a_min` and `a_max` cannot both be set to `None`")
 
     if a_min is not None:
-        a = maximum(a_min, a, constant=constant)
+        a = maximum(a_min, a, out=out, constant=constant)
 
     if a_max is not None:
-        a = minimum(a_max, a, constant=constant)
-
+        a = minimum(a_max, a, out=out, constant=constant)
     return a
 
 
diff --git a/src/mygrad/math/misc/ops.py b/src/mygrad/math/misc/ops.py
index 3b041a18..6af7be3f 100644
--- a/src/mygrad/math/misc/ops.py
+++ b/src/mygrad/math/misc/ops.py
@@ -11,10 +11,17 @@
 class Abs(UnaryUfunc):
     numpy_ufunc = np.absolute
 
+    def __call__(self, *args, nan_to_num: bool = True, **kwargs):
+        self._nan_to_num = nan_to_num
+        return super().__call__(*args, **kwargs)
+
     def backward_var(self, grad, index, **kwargs):
         (a,) = self.variables
+
         return grad * np.piecewise(
-            a.data, [a.data < 0, a.data == 0, a.data > 0], [-1, np.nan, 1]
+            a.data,
+            [a.data < 0, a.data == 0, a.data > 0],
+            [-1, (0 if self._nan_to_num else np.nan), 1],
         )
 
 
@@ -117,4 +124,4 @@ def backward_var(self, grad, index, **kwargs):
                 dfdx = a[:, np.newaxis] * np.expand_dims(grad, -2)
             return dfdx
         else:  # pragma: no cover
-            raise ValueError()
\ No newline at end of file
+            raise ValueError()
diff --git a/src/mygrad/math/sequential/funcs.py b/src/mygrad/math/sequential/funcs.py
index 47612185..d8c5d5c5 100644
--- a/src/mygrad/math/sequential/funcs.py
+++ b/src/mygrad/math/sequential/funcs.py
@@ -114,7 +114,7 @@ def sum(
     Tensor(-128)
     """
     return Tensor._op(
-        Sum, x, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+        Sum, x, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
     )
 
 
@@ -191,7 +191,7 @@ def mean(
     Tensor(0.55000000074505806)
     """
     return Tensor._op(
-        Mean, x, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+        Mean, x, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
     )
 
 
@@ -286,7 +286,7 @@ def var(
     return Tensor._op(
         Variance,
         x,
-        op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
+        op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
         constant=constant,
     )
 
@@ -381,7 +381,7 @@ def std(
     return Tensor._op(
         StdDev,
         x,
-        op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
+        op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
         constant=constant,
     )
 
@@ -446,7 +446,7 @@ def max(
     return Tensor._op(
         Max,
         x,
-        op_kwargs=dict(axis=axis, keepdims=keepdims, dtype=_NoValue),
+        op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
         constant=constant,
     )
 
@@ -509,7 +509,7 @@ def min(
     return Tensor._op(
         Min,
         x,
-        op_kwargs=dict(axis=axis, keepdims=keepdims, dtype=_NoValue),
+        op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
         constant=constant,
     )
 
@@ -588,7 +588,7 @@ def prod(
     ...          [3.,4.]], axis=1)
     Tensor([  2.,  12.])"""
     return Tensor._op(
-        Prod, a, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+        Prod, a, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
     )
 
 
@@ -654,7 +654,7 @@ def cumprod(
     Tensor([[  1,   2,   6],
             [  4,  20, 120]])"""
 
-    return Tensor._op(CumProd, a, op_kwargs=dict(axis=axis), constant=constant)
+    return Tensor._op(CumProd, a, op_kwargs={"axis": axis}, constant=constant)
 
 
 @implements_numpy_override()
@@ -705,4 +705,4 @@ def cumsum(
             [ 4,  9, 15]])
     """
 
-    return Tensor._op(CumSum, a, op_kwargs=dict(axis=axis), constant=constant)
+    return Tensor._op(CumSum, a, op_kwargs={"axis": axis}, constant=constant)
diff --git a/src/mygrad/nnet/layers/conv.py b/src/mygrad/nnet/layers/conv.py
index cf3d7448..aa668a9f 100644
--- a/src/mygrad/nnet/layers/conv.py
+++ b/src/mygrad/nnet/layers/conv.py
@@ -390,6 +390,6 @@ def conv_nd(
         ConvND,
         x,
         filter_bank,
-        op_kwargs=dict(stride=stride, padding=padding, dilation=dilation),
+        op_kwargs={"stride": stride, "padding": padding, "dilation": dilation},
         constant=constant,
     )
diff --git a/src/mygrad/tensor_base.py b/src/mygrad/tensor_base.py
index e6f3cea2..28ac4d70 100644
--- a/src/mygrad/tensor_base.py
+++ b/src/mygrad/tensor_base.py
@@ -396,7 +396,7 @@ def astensor(
 }
 
 
-_REGISTERED_NO_DIFF_NUMPY_FUNCS: Set[Callable[..., np.ndarray]] = {
+_REGISTERED_NO_DIFF_NUMPY_FUNCS: Set[Callable] = {
     np.allclose,
     np.bincount,
     np.can_cast,
@@ -1528,17 +1528,20 @@ def constant(self) -> bool:
         return self._constant
 
     @property
-    def creator(self) -> Operation:
+    def creator(self) -> Optional[Operation]:
         """The ``Operation`` instance that produced ``self``.
 
         Returns
         -------
-        Operation
+        creator : Optional[Operation]
+            The operation-instance that created the tensor, or `None`.
 
         Examples
         --------
         >>> import mygrad as mg
         >>> x = mg.Tensor(3)
+        >>> x.creator is None
+        True
         >>> y = mg.Tensor(2)
         >>> z = x * y  # Multiply(x, y) -> z
         >>> z.creator
@@ -1750,9 +1753,9 @@ def _in_place_op(
                     placeholder_mutant_view,  # gets passed through unchanged
                     # ~mask * grad  backprops to upstream placeholder
                     graph[self].placeholder,
-                    op_kwargs=dict(
-                        mask=placeholder_mutant_view.creator.where,
-                    ),
+                    op_kwargs={
+                        "mask": placeholder_mutant_view.creator.where,
+                    },
                 )
 
         # Connect public base tensor to placeholder graph via the mutated placeholder
@@ -1788,12 +1791,12 @@ def _in_place_op(
                 _dup.UnView,
                 graph.base.placeholder,
                 placeholder_mutant_view,
-                op_kwargs=dict(
+                op_kwargs={
                     # Copy to avoid upstream placeholder mutant view sharing memory
                     # with downstream mutant base
-                    mutant_base_data=mutant_base_data,
-                    view_fn_sequence=view_fn_sequence,
-                ),
+                    "mutant_base_data": mutant_base_data,
+                    "view_fn_sequence": view_fn_sequence,
+                },
             )
 
         del placeholder_mutant_view
@@ -2118,6 +2121,11 @@ def __int__(self) -> int:
             raise TypeError("can only convert a tensor of size 1 to a Python scalar")
         return int(self.data)
 
+    def __index__(self) -> int:
+        """Return self converted to an integer, if self is suitable for use as an index
+        into a list."""
+        return self.data.__index__()
+
     def flatten(self, *, constant: bool = None) -> "Tensor":
         """Return a copy of the tensor collapsed into one dimension.
 
@@ -2427,7 +2435,7 @@ def sum(
         Tensor([1, 5])
         """
         return Tensor._op(
-            Sum, self, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+            Sum, self, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
         )
 
     def prod(
@@ -2465,7 +2473,10 @@ def prod(
         product_along_axis : mygrad.Tensor
             A tensor shaped as `a` but with the specified axis removed."""
         return Tensor._op(
-            Prod, self, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+            Prod,
+            self,
+            op_kwargs={"axis": axis, "keepdims": keepdims},
+            constant=constant,
         )
 
     def cumprod(
@@ -2508,7 +2519,7 @@ def cumprod(
         Arithmetic is modular when using integer types, and no error is
         raised on overflow."""
 
-        return Tensor._op(CumProd, self, op_kwargs=dict(axis=axis), constant=constant)
+        return Tensor._op(CumProd, self, op_kwargs={"axis": axis}, constant=constant)
 
     def cumsum(
         self,
@@ -2542,7 +2553,7 @@ def cumsum(
         mygrad.Tensor
         """
 
-        return Tensor._op(CumSum, self, op_kwargs=dict(axis=axis), constant=constant)
+        return Tensor._op(CumSum, self, op_kwargs={"axis": axis}, constant=constant)
 
     def mean(
         self,
@@ -2590,7 +2601,10 @@ def mean(
             a 0-dim Tensor is returned.
         """
         return Tensor._op(
-            Mean, self, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant
+            Mean,
+            self,
+            op_kwargs={"axis": axis, "keepdims": keepdims},
+            constant=constant,
         )
 
     def std(
@@ -2652,7 +2666,7 @@ def std(
         return Tensor._op(
             StdDev,
             self,
-            op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
+            op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
             constant=constant,
         )
 
@@ -2714,7 +2728,7 @@ def var(
         return Tensor._op(
             Variance,
             self,
-            op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
+            op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
             constant=constant,
         )
 
@@ -2777,7 +2791,7 @@ def max(
         return Tensor._op(
             Max,
             self,
-            op_kwargs=dict(axis=axis, keepdims=keepdims, dtype=_NoValue),
+            op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
             constant=constant,
         )
 
@@ -2838,7 +2852,7 @@ def min(
         return Tensor._op(
             Min,
             self,
-            op_kwargs=dict(axis=axis, keepdims=keepdims, dtype=_NoValue),
+            op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
             constant=constant,
         )
 
@@ -3124,7 +3138,12 @@ def any(
         return np.any(self.data, axis=axis, out=out, keepdims=keepdims)
 
     def clip(
-        self, a_min: ArrayLike, a_max: ArrayLike, *, constant: Optional[bool] = None
+        self,
+        a_min: ArrayLike,
+        a_max: ArrayLike,
+        out: Optional[Union[np.ndarray, "Tensor"]] = None,
+        *,
+        constant: Optional[bool] = None,
     ) -> "Tensor":  # pragma: no cover
         """Clip (limit) the values in an array.
 
@@ -3152,6 +3171,11 @@ def clip(
             `None`. If `a_min` or `a_max` are ArrayLike, then the three
             arrays will be broadcasted to match their shapes.
 
+        out : Optional[Union[ndarray, Tensor]]
+            A location into which the result is stored. If provided, it must have
+            a shape that the inputs broadcast to. If not provided or None, a
+            freshly-allocated tensor is returned.
+
         constant : bool, optional(default=False)
             If ``True``, the returned tensor is a constant (it
             does not backpropagate a gradient)
diff --git a/src/mygrad/tensor_creation/funcs.py b/src/mygrad/tensor_creation/funcs.py
index 7a87f734..59076921 100644
--- a/src/mygrad/tensor_creation/funcs.py
+++ b/src/mygrad/tensor_creation/funcs.py
@@ -675,14 +675,14 @@ def full_like(
 
 
 def arange(
-    start: Real,
-    stop: Real = None,
-    step: int = None,
-    dtype: Optional[DTypeLikeReals] = None,
-    *,
+    *args,
     constant: Optional[bool] = None,
+    **kwargs,
 ) -> Tensor:
-    """Return a Tensor with evenly-spaced values within a given interval.
+    """
+    arange([start,] stop[, step,], dtype=None, *, constant=None)
+
+    Return a Tensor with evenly-spaced values within a given interval.
 
     Values are generated within [start, stop). Note that for non-integer steps, results may be
     inconsistent; you are better off using `linspace` instead.
@@ -726,19 +726,14 @@ def arange(
     >>> import mygrad as mg
     >>> mg.arange(3)
     Tensor([0, 1, 2])
-    >>> mg.arange(3.0, constant=True)
-    Tensor([ 0.,  1.,  2.])  # resulting tensor will not back-propagate a gradient
+    >>> mg.arange(3.0, constant=True)  # resulting tensor will not back-propagate a gradient
+    Tensor([ 0.,  1.,  2.])
     >>> mg.arange(3,7)
     Tensor([3, 4, 5, 6])
     >>> mg.arange(3,7,2)
     Tensor([3, 5])
     """
-    if stop is None:
-        arr = np.arange(start, step=step, dtype=dtype)
-    else:
-        arr = np.arange(start, stop, step=step, dtype=dtype)
-
-    return Tensor(arr, constant=constant, copy=False)
+    return Tensor(np.arange(*args, **kwargs), constant=constant, copy=False)
 
 
 def linspace(
diff --git a/src/mygrad/tensor_manip/array_shape/funcs.py b/src/mygrad/tensor_manip/array_shape/funcs.py
index 3b4448ad..3b6027a2 100644
--- a/src/mygrad/tensor_manip/array_shape/funcs.py
+++ b/src/mygrad/tensor_manip/array_shape/funcs.py
@@ -1,11 +1,22 @@
-from typing import Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, TypeVar, Union, cast, overload
 
 from mygrad.tensor_base import Tensor, implements_numpy_override
 from mygrad.typing import ArrayLike, Shape
 
 from .ops import *
 
-__all__ = ["reshape", "squeeze", "ravel", "expand_dims", "broadcast_to"]
+__all__ = [
+    "reshape",
+    "squeeze",
+    "ravel",
+    "expand_dims",
+    "broadcast_to",
+    "atleast_1d",
+    "atleast_2d",
+    "atleast_3d",
+]
+
+_T = TypeVar("_T")
 
 
 @implements_numpy_override()
@@ -234,3 +245,252 @@ def broadcast_to(
     shapes [original->remapped]: (3,) and requested shape (4,4)
     """
     return Tensor._op(BroadcastTo, a, op_args=(shape,), constant=constant)
+
+
+def _dispatch_atleast_kd(func: Callable[..., _T], Op, *tensors, k: int, constant) -> _T:
+    if len(tensors) == 1:
+        (t,) = tensors
+        if (
+            isinstance(t, Tensor)
+            and t.ndim >= k
+            and (constant is None or t.constant is constant)
+        ):
+            # return tensor unchanged
+            return cast(_T, t)
+        return cast(_T, Tensor._op(Op, t, constant=constant))
+    else:
+        out = [func(t, constant=constant) for t in tensors]
+        return cast(_T, out)
+
+
+@overload
+def atleast_1d(
+    tensors: ArrayLike, *, constant: Optional[bool] = None
+) -> Tensor:  # pragma: no cover
+    ...
+
+
+@overload
+def atleast_1d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> List[Tensor]:  # pragma: no cover
+    ...
+
+
+@implements_numpy_override()
+def atleast_1d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> Union[Tensor, List[Tensor]]:
+    """
+    Convert inputs to tensors with at least one dimension.
+
+    Scalar inputs are converted to 1-dimensional tensors, whilst
+    higher-dimensional inputs are preserved.
+
+    This docstring was adapted from ``numpy.atleast_1d``.
+
+    Parameters
+    ----------
+    tens1, tens2, ... : ArrayLike
+        One or more input tensors.
+
+    Returns
+    -------
+    ret : Tensor | List[Tensor]
+        A tensor, or list of tensors, each with ``a.ndim >= 1``.
+        Copies are made only if necessary.
+
+    See Also
+    --------
+    atleast_2d, atleast_3d
+
+    Examples
+    --------
+    >>> import mygrad as mg
+    >>> mg.atleast_1d(1.0)
+    array([1.])
+
+    >>> x = mg.arange(9.0).reshape(3,3)
+    >>> np.atleast_1d(x)
+    Tensor([[0., 1., 2.],
+            [3., 4., 5.],
+            [6., 7., 8.]])
+    >>> mg.atleast_1d(x) is x
+    True
+
+    >>> mg.atleast_1d(1, [3, 4])
+    [Tensor([1]), Tensor([3, 4])]
+
+    ``numpy.atleast_1d`` will dispatch appropriately on tensors.
+
+    >>> x = mg.tensor(2.)
+    >>> np.atleast_1d(x)
+    Tensor([2.])
+
+    >>> np.atleast_1d(x).backward()
+    >>> x.grad
+    array(1.)
+
+    If any argument to ``numpy.atleast_1d`` is a Tensor, ``mygrad.atleast_1d``
+    will be dispatched on all of the arguments.
+
+    >>> np.atleast_1d(x, 1.)
+    [Tensor([2.]), Tensor([1.])]
+    """
+    return _dispatch_atleast_kd(atleast_1d, AtLeast1D, *tensors, k=1, constant=constant)
+
+
+@overload
+def atleast_2d(
+    tensors: ArrayLike, *, constant: Optional[bool] = None
+) -> Tensor:  # pragma: no cover
+    ...
+
+
+@overload
+def atleast_2d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> List[Tensor]:  # pragma: no cover
+    ...
+
+
+@implements_numpy_override()
+def atleast_2d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> Union[Tensor, List[Tensor]]:
+    """
+    Convert inputs to tensors with at least one dimension.
+
+    Scalar inputs are converted to 2-dimensional tensors, whilst
+    higher-dimensional inputs are preserved.
+
+    This docstring was adapted from ``numpy.atleast_2d``.
+
+    Parameters
+    ----------
+    tens1, tens2, ... : ArrayLike
+        One or more input tensors.
+
+    Returns
+    -------
+    ret : Tensor | List[Tensor]
+        A tensor, or list of tensors, each with ``a.ndim >= 2``.
+        Copies are made only if necessary.
+
+    See Also
+    --------
+    atleast_1d, atleast_3d
+
+    Examples
+    --------
+    >>> import mygrad as mg
+    >>> mg.atleast_2d(3.0)
+    Tensor([[3.]])
+
+    >>> x = mg.arange(3.0)
+    >>> mg.atleast_2d(x)
+    array([[0., 1., 2.]])
+    >>> mg.atleast_2d(x).base is x
+    True
+
+    >>> mg.atleast_2d(1, [1, 2], [[1, 2]])
+    [Tensor([[1]]), Tensor([[1, 2]]), Tensor([[1, 2]])]
+
+    ``numpy.atleast_2d`` will dispatch appropriately on tensors.
+
+    >>> x = mg.tensor(2.)
+    >>> np.atleast_2d(x)
+    Tensor([[2.]])
+
+    >>> np.atleast_2d(x).backward()
+    >>> x.grad
+    array(1.)
+
+    If any argument to ``numpy.atleast_2d`` is a Tensor, ``mygrad.atleast_2d``
+    will be dispatched on all of the arguments.
+
+    >>> np.atleast_2d(x, 1.)
+    [Tensor([[2.]]), Tensor([[1.]])]
+    """
+    return _dispatch_atleast_kd(atleast_2d, AtLeast2D, *tensors, k=2, constant=constant)
+
+
+@overload
+def atleast_3d(
+    tensors: ArrayLike, *, constant: Optional[bool] = None
+) -> Tensor:  # pragma: no cover
+    ...
+
+
+@overload
+def atleast_3d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> List[Tensor]:  # pragma: no cover
+    ...
+
+
+@implements_numpy_override()
+def atleast_3d(
+    *tensors: ArrayLike, constant: Optional[bool] = None
+) -> Union[Tensor, List[Tensor]]:
+    """
+    Convert inputs to tensors with at least one dimension.
+
+    Scalar inputs are converted to 3-dimensional tensors, whilst
+    higher-dimensional inputs are preserved.
+
+    This docstring was adapted from ``numpy.atleast_3d``.
+
+    Parameters
+    ----------
+    tens1, tens2, ... : ArrayLike
+        One or more input tensors.
+
+    Returns
+    -------
+    ret : Tensor | List[Tensor]
+        A tensor, or list of tensors, each with ``a.ndim >= 3``.
+        Copies are made only if necessary. For example, a 1-D tensor of shape ``(N,)``
+        becomes a view of shape ``(1, N, 1)``, and a 2-D tensor of shape ``(M, N)``
+        becomes a view of shape ``(M, N, 1)``.
+
+    See Also
+    --------
+    atleast_1d, atleast_3d
+
+    Examples
+    --------
+    >>> import mygrad as mg
+    >>> mg.atleast_3d(3.0)
+    Tensor([[[3.]]])
+
+    >>> x = mg.arange(3.0)
+    >>> mg.atleast_3d(x).shape
+    (1, 3, 1)
+    >>> mg.atleast_3d(x).base is x
+    True
+
+    >>> x = mg.arange(12.0).reshape(4,3)
+    >>> mg.atleast_3d(x).shape
+    (4, 3, 1)
+
+    >>> mg.atleast_3d(1, [[1, 2]], [[[[1, 2]]]])
+    [Tensor([[[1]]]), Tensor([[[1, 2]]]), Tensor([[[[1, 2]]]])]
+
+    ``numpy.atleast_3d`` will dispatch appropriately on tensors.
+
+    >>> x = mg.tensor(2.)
+    >>> np.atleast_3d(x)
+    Tensor([[[2.]]])
+
+    >>> np.atleast_3d(x).backward()
+    >>> x.grad
+    array(1.)
+
+    If any argument to ``numpy.atleast_3d`` is a Tensor, ``mygrad.atleast_3d``
+    will be dispatched on all of the arguments.
+
+    >>> np.atleast_3d(x, 1.)
+    [Tensor([[[2.]]]), Tensor([[[1.]]])]
+    """
+    return _dispatch_atleast_kd(atleast_3d, AtLeast3D, *tensors, k=3, constant=constant)
diff --git a/src/mygrad/tensor_manip/array_shape/ops.py b/src/mygrad/tensor_manip/array_shape/ops.py
index ecc43379..b6115889 100644
--- a/src/mygrad/tensor_manip/array_shape/ops.py
+++ b/src/mygrad/tensor_manip/array_shape/ops.py
@@ -1,11 +1,52 @@
+from typing import Callable
+
 import numpy as np
 
 from mygrad.operation_base import Operation
 
-__all__ = ["Reshape", "Flatten", "Squeeze", "Ravel", "ExpandDims", "BroadcastTo"]
+__all__ = [
+    "Reshape",
+    "Flatten",
+    "Squeeze",
+    "Ravel",
+    "ExpandDims",
+    "BroadcastTo",
+    "AtLeast1D",
+    "AtLeast2D",
+    "AtLeast3D",
+]
+
+
+class _PreservesOrder(Operation):
+    """Base class for operations that preserve an array's size 
+    and flat-iteration element ordering"""
+    def backward_var(self, grad: np.ndarray, index: int, **kwargs) -> np.ndarray:
+        (a,) = self.variables
+        return np.reshape(grad, a.shape)
 
 
-class Reshape(Operation):
+class _AtLeastKD(_PreservesOrder):
+    can_return_view = True
+    numpy_func: Callable[[np.ndarray], np.ndarray]
+
+    def __call__(self, a):
+        self.variables = (a,)
+        return self.numpy_func(a.data)
+
+
+class AtLeast1D(_AtLeastKD):
+    numpy_func = staticmethod(np.atleast_1d)
+
+
+class AtLeast2D(_AtLeastKD):
+    numpy_func = staticmethod(np.atleast_2d)
+
+
+class AtLeast3D(_AtLeastKD):
+    numpy_func = staticmethod(np.atleast_3d)
+
+
+class Reshape(_PreservesOrder):
     can_return_view = True
 
     def __call__(self, a, newshape):
@@ -23,12 +64,8 @@ def __call__(self, a, newshape):
         self.variables = (a,)
         return np.reshape(a.data, newshape)
 
-    def backward_var(self, grad, index, **kwargs):
-        a = self.variables[index]
-        return np.reshape(grad, a.shape)
-
 
-class Squeeze(Operation):
+class Squeeze(_PreservesOrder):
     can_return_view = True
 
     def __call__(self, a, axis):
@@ -38,12 +75,8 @@ def __call__(self, a, axis):
         self.variables = (a,)
         return np.squeeze(a.data, axis=axis)
 
-    def backward_var(self, grad, index, **kwargs):
-        a = self.variables[index]
-        return grad.reshape(a.shape)
-
 
-class Flatten(Operation):
+class Flatten(_PreservesOrder):
     def __call__(self, a):
         """Parameters
         ----------
@@ -51,12 +84,8 @@ def __call__(self, a):
         self.variables = (a,)
         return a.data.flatten(order="C")
 
-    def backward_var(self, grad, index, **kwargs):
-        a = self.variables[index]
-        return grad.reshape(a.shape)
-
 
-class Ravel(Operation):
+class Ravel(_PreservesOrder):
     can_return_view = True
 
     def __call__(self, a):
@@ -66,12 +95,8 @@ def __call__(self, a):
         self.variables = (a,)
         return np.ravel(a.data, order="C")
 
-    def backward_var(self, grad, index, **kwargs):
-        a = self.variables[index]
-        return grad.reshape(a.shape)
-
 
-class ExpandDims(Operation):
+class ExpandDims(_PreservesOrder):
     can_return_view = True
 
     def __call__(self, a, axis):
@@ -82,10 +107,6 @@ def __call__(self, a, axis):
         self.variables = (a,)
         return np.expand_dims(a.data, axis=axis)
 
-    def backward_var(self, grad, index, **kwargs):
-        a = self.variables[index]
-        return grad.reshape(a.shape)
-
 
 class BroadcastTo(Operation):
     can_return_view = True
diff --git a/src/mygrad/ufuncs/_ufunc_creators.py b/src/mygrad/ufuncs/_ufunc_creators.py
index ac585bd6..4f4cc44b 100644
--- a/src/mygrad/ufuncs/_ufunc_creators.py
+++ b/src/mygrad/ufuncs/_ufunc_creators.py
@@ -183,6 +183,7 @@ def __call__(
         where: Mask = True,
         dtype: DTypeLikeReals = None,
         constant: Optional[bool] = None,
+        **kwargs,
     ) -> Tensor:
         # it is fastest to check if out is None, which is likely the
         # most common scenario, and this is a very "hot path" in the
@@ -191,7 +192,7 @@ def __call__(
             out._in_place_op(
                 cls._wrapped_op,
                 x,
-                op_kwargs={"where": where, "dtype": dtype},
+                op_kwargs={"where": where, "dtype": dtype, **kwargs},
                 constant=constant,
             )
             return out
@@ -199,7 +200,7 @@ def __call__(
             return Tensor._op(
                 cls._wrapped_op,
                 x,
-                op_kwargs={"where": where, "dtype": dtype},
+                op_kwargs={"where": where, "dtype": dtype, **kwargs},
                 constant=constant,
                 out=out,
             )
@@ -391,27 +392,27 @@ def reduceat(
         decorated_func.__name__,
         (object,),
         (
-            dict(
-                _wrapped_op=op,
-                at=at,
-                accumulate=accumulate,
-                reduce=reduce,
-                reduceat=reduceat,
-                outer=outer,
-                signature=op.numpy_ufunc.signature,
-                identity=op.numpy_ufunc.identity,
-                nargs=op.numpy_ufunc.nargs,
-                nin=op.numpy_ufunc.nin,
-                nout=op.numpy_ufunc.nout,
-                ntypes=len(types),
-                types=types,
-                _decorated_func=decorated_func,
-                __name__=decorated_func.__name__,
-                __qualname__=decorated_func.__name__,
-                __signature__=signature(decorated_func),
-                __annotations__=get_type_hints(decorated_func),
-                __doc__=decorated_func.__doc__,
-            )
+            {
+                "_wrapped_op": op,
+                "at": at,
+                "accumulate": accumulate,
+                "reduce": reduce,
+                "reduceat": reduceat,
+                "outer": outer,
+                "signature": op.numpy_ufunc.signature,
+                "identity": op.numpy_ufunc.identity,
+                "nargs": op.numpy_ufunc.nargs,
+                "nin": op.numpy_ufunc.nin,
+                "nout": op.numpy_ufunc.nout,
+                "ntypes": len(types),
+                "types": types,
+                "_decorated_func": decorated_func,
+                "__name__": decorated_func.__name__,
+                "__qualname__": decorated_func.__name__,
+                "__signature__": signature(decorated_func),
+                "__annotations__": get_type_hints(decorated_func),
+                "__doc__": decorated_func.__doc__,
+            }
         ),
     )
     ufunc.register(out)
diff --git a/tests/conftest.py b/tests/conftest.py
index ef0cfced..abaa03a3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+import tempfile
 import warnings
 
 import pytest
@@ -70,3 +71,24 @@ def raise_on_mem_locking_state_leakage() -> bool:
         assert True
 
     clear_all_mem_locking_state()
+
+
+@pytest.fixture()
+def cleandir() -> str:
+    """This fixture will use the stdlib `tempfile` module to
+    move the current working directory to a tmp-dir for the
+    duration of the test.
+
+    Afterwards, the session returns to its previous working
+    directory, and the temporary directory and its contents
+    are removed.
+
+    Yields
+    ------
+    str
+        The name of the temporary directory."""
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        old_dir = os.getcwd()
+        os.chdir(tmpdirname)
+        yield tmpdirname
+        os.chdir(old_dir)
diff --git a/tests/linalg/test_norm.py b/tests/linalg/test_norm.py
index 27cce5e6..5eb1ec59 100644
--- a/tests/linalg/test_norm.py
+++ b/tests/linalg/test_norm.py
@@ -161,3 +161,17 @@ def test_norm_backward_1d(x, data, ord):
 
     assert_allclose(o1, o2)
     assert_allclose(t1.grad, t2.grad, atol=1e-7, rtol=1e-7)
+
+
+def test_nan_to_num_behavior():
+    x = mg.tensor([[1.0, 2.0, 3.0], [1.0, 0.0, 0.0]])
+    y = x.copy()
+    z = x.copy()
+
+    mg.linalg.norm(x, axis=1, nan_to_num=False).backward()
+    mg.linalg.norm(y, axis=1, nan_to_num=True).backward()
+    mg.linalg.norm(z, axis=1).backward()  # default behavior should be `nan_to_num=True`
+
+    assert np.isnan(x.grad).sum() == 2
+    assert_allclose(np.nan_to_num(x.grad), y.grad)
+    assert_allclose(z.grad, y.grad)
diff --git a/tests/math/binary/test_binary_funcs.py b/tests/math/binary/test_binary_funcs.py
index 076dd9c0..84aa886b 100644
--- a/tests/math/binary/test_binary_funcs.py
+++ b/tests/math/binary/test_binary_funcs.py
@@ -7,7 +7,7 @@
 import hypothesis.strategies as st
 import numpy as np
 import pytest
-from hypothesis import given
+from hypothesis import given, assume
 from numpy.testing import assert_allclose
 
 from tests.custom_strategies import tensors
@@ -20,17 +20,17 @@ def inplace_op(inplace_target, other, constant=False, *, op_name: str):
 
     op_name = "__" + op_name + "__"
 
-    # hack to make broadcastable shapes work for inplace op:
-    # 1. Ensure that inplace op has at least as many items as `other`.
-    # 2. `other` can't have excess leading dims
-    inplace_target, other = (
-        (inplace_target, other)
-        if inplace_target.size >= other.size
-        else (other, inplace_target)
-    )
-    if other.ndim > inplace_target.ndim:
-        other = other[(0,) * (other.ndim - inplace_target.ndim)]
-
+    # hack to make broadcastable shapes work for inplace op:    
+    x = inplace_target.copy()
+    check = False
+    
+    if np.broadcast(inplace_target, other).shape != inplace_target.shape:
+        inplace_target, other = other, inplace_target
+        check = True
+    
+    if check and np.broadcast(inplace_target, other).shape != inplace_target.shape:
+        assume(False)
+            
     # touch so that it doesn't look like the input
     # was mutated
     inplace_target = +inplace_target
diff --git a/tests/math/test_misc.py b/tests/math/test_misc.py
index 41739c20..c1ca075c 100644
--- a/tests/math/test_misc.py
+++ b/tests/math/test_misc.py
@@ -106,13 +106,6 @@ def amax_clip_only(clip_func, a, b, constant=False):
     )
 
 
-skip_if_lower_than_numpy_1p17 = pytest.mark.skipif(
-    np.__version__ < "1.17",
-    reason="numpy.clip behavior was made consistent in numpy-1.17; "
-    "test must by run on numpy 1.17 or later",
-)
-
-
 @pytest.mark.parametrize(
     ("mygrad_clip", "numpy_clip", "num_arrays"),
     [
@@ -129,7 +122,6 @@ def amax_clip_only(clip_func, a, b, constant=False):
         (clip, np.clip, 3),
     ],
 )
-@skip_if_lower_than_numpy_1p17
 def test_clip_fwd(mygrad_clip: Callable, numpy_clip: Callable, num_arrays: int):
     @fwdprop_test_factory(
         num_arrays=num_arrays, mygrad_func=mygrad_clip, true_func=numpy_clip
@@ -162,7 +154,6 @@ def is_not_close_clip(a: Tensor, a_min=None, a_max=None) -> bool:
         (clip, np.clip, 3),
     ],
 )
-@skip_if_lower_than_numpy_1p17
 def test_clip_bkwd(mygrad_clip: Callable, numpy_clip: Callable, num_arrays: int):
     @backprop_test_factory(
         num_arrays=num_arrays,
@@ -193,7 +184,6 @@ def wrapped_test():
         dtype=float,
     ),
 )
-@skip_if_lower_than_numpy_1p17
 @pytest.mark.filterwarnings("ignore: invalid value")
 def test_clip_input_validation(a, a_min, a_max):
     try:
@@ -218,3 +208,18 @@ def test_clip_method_bkwd():
     x = mg.tensor([1.0, 5.0, 10.0])
     x.clip(2, 7).backward()
     assert_allclose(x.grad, [0.0, 1.0, 0.0])
+
+
+@pytest.mark.parametrize("as_method", [False, True])
+def test_inplace_clip(as_method: bool):
+    x = mg.arange(4.0)
+    y = mg.tensor([-1.0, 2.0, 3.0])
+
+    clipper = y.clip if as_method else partial(mg.clip, y)
+    out = clipper(0, 2.1, out=x[1:])
+    out.backward()
+
+    assert_allclose(out, mg.tensor([0.0, 2.0, 2.1]))
+    assert_allclose(x, mg.tensor([0.0, 0.0, 2.0, 2.1]))
+    assert_allclose(x.grad, np.array([0.0, 1.0, 1.0, 1.0]))
+    assert_allclose(y.grad, np.array([0.0, 1.0, 0.0]))
diff --git a/tests/nnet/layers/test_batchnorm.py b/tests/nnet/layers/test_batchnorm.py
index bf6ef62e..5953acaf 100644
--- a/tests/nnet/layers/test_batchnorm.py
+++ b/tests/nnet/layers/test_batchnorm.py
@@ -163,6 +163,8 @@ def test_batchnorm_fwd():
     index_to_arr_shapes={0: hnp.array_shapes(min_dims=2, max_dims=4)},
     kwargs=lambda x: st.fixed_dictionaries(dict(eps=st.floats(1e-20, 1e0))),
     vary_each_element=True,
+    atol=1e-5,
+    rtol=1e-5,
 )
 def test_batchnorm_bkwd():
     pass
diff --git a/tests/tensor_base/test_tensor.py b/tests/tensor_base/test_tensor.py
index fc7e678f..95f5875c 100644
--- a/tests/tensor_base/test_tensor.py
+++ b/tests/tensor_base/test_tensor.py
@@ -11,8 +11,8 @@
 import mygrad as mg
 from mygrad import Tensor
 from mygrad.errors import InvalidBackprop
-from mygrad.math.misc.ops import MatMul
 from mygrad.math.arithmetic.ops import Add, Divide, Multiply, Negative, Power, Subtract
+from mygrad.math.misc.ops import MatMul
 from mygrad.operation_base import Operation
 from tests.custom_strategies import tensors, valid_constant_arg
 from tests.utils.errors import does_not_raise
@@ -255,7 +255,7 @@ def test_init_data_rand(x: np.ndarray):
     | st.integers(-100, 100)
 )
 def test_items(x):
-    """ verify that tensor.item() mirrors array.item()"""
+    """verify that tensor.item() mirrors array.item()"""
     tensor = Tensor(x)
     try:
         value = np.asarray(x).item()
@@ -291,7 +291,7 @@ def test_items(x):
     dtype=dtype_strat,
     numpy_dtype=dtype_strat_numpy,
     ndmin=st.integers(0, 10),
-    copy=st.none() | st.booleans(),
+    copy=st.booleans(),
 )
 def test_init_params(
     data,
@@ -299,7 +299,7 @@ def test_init_params(
     dtype,
     numpy_dtype,
     ndmin: int,
-    copy: Optional[bool],
+    copy: bool,
 ):
     """Check for bad combinations of init parameters leading to unexpected behavior"""
     elements = (
@@ -552,3 +552,23 @@ def test_no_hash():
         {Tensor(3): "this should not work"}
     except TypeError as e:
         assert str(e) == "unhashable type: 'Tensor'"
+
+
+@given(
+    hnp.arrays(
+        shape=hnp.array_shapes(min_dims=0, max_dims=2, min_side=0, max_side=2),
+        dtype=hnp.integer_dtypes() | hnp.floating_dtypes(),
+    )
+)
+def test_index(arr):
+    tens = mg.tensor(arr)
+
+    try:
+        arr_index = arr.__index__()
+    except TypeError as e:
+        with pytest.raises(type(e)):
+            _ = tens.__index__()
+        return
+
+    tens_index = tens.__index__()
+    assert arr_index == tens_index
diff --git a/tests/test_duplicating_graph.py b/tests/test_duplicating_graph.py
index 1459fe2d..bbeb7f7b 100644
--- a/tests/test_duplicating_graph.py
+++ b/tests/test_duplicating_graph.py
@@ -151,11 +151,11 @@ def create_view_of_node(self, parent: Tensor, view_op: Callable[[Tensor], Tensor
 
     @rule()
     def perform_non_view_op_of_base(self):
-        return 2 * self.base  # this shouldn't affect the view graph
+        2 * self.base  # this shouldn't affect the view graph
 
     @rule(parent=nodes)
     def perform_non_view_op_of_node(self, parent: Tensor):
-        return 2 * parent  # this shouldn't affect the view graph
+        2 * parent  # this shouldn't affect the view graph
 
     def teardown(self):
         graph = DuplicatingGraph(self.base)
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 00000000..c5f54852
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,50 @@
+from pathlib import Path
+from string import ascii_lowercase
+from typing import Any
+
+import hypothesis.strategies as st
+import pytest
+from hypothesis import given, settings
+from numpy.testing import assert_array_equal
+
+from mygrad import Tensor, load, save
+from tests.custom_strategies import everything_except, tensors
+
+filenames = st.text(ascii_lowercase, min_size=1).map(lambda x: x + ".npz")
+
+
+@given(
+    fname=filenames,
+    as_path=st.booleans(),
+    tensor=tensors(
+        include_grad=st.booleans(), dtype=st.sampled_from(["float32", "float64"])
+    ),
+)
+@pytest.mark.usefixtures("cleandir")
+def test_save_load_roundtrip(fname: str, as_path: bool, tensor: Tensor):
+    if as_path:
+        fname = Path(fname)
+
+    save(fname, tensor)
+    loaded = load(fname)
+    assert_array_equal(tensor, loaded)
+
+    if tensor.grad is None:
+        assert loaded.grad is None
+    else:
+        assert_array_equal(tensor.grad, loaded.grad)
+
+
+@settings(max_examples=10)
+@given(
+    fname=filenames,
+    as_path=st.booleans(),
+    not_tensor=everything_except(Tensor),
+)
+@pytest.mark.usefixtures("cleandir")
+def test_validation(fname: str, as_path: bool, not_tensor: Any):
+    if as_path:
+        fname = Path(fname)
+
+    with pytest.raises(TypeError):
+        save(fname, not_tensor)
diff --git a/tests/test_tensor_manip.py b/tests/test_tensor_manip.py
index 98f60703..1a2397be 100644
--- a/tests/test_tensor_manip.py
+++ b/tests/test_tensor_manip.py
@@ -1,23 +1,30 @@
 from itertools import permutations
+from typing import List
 
 import hypothesis.extra.numpy as hnp
 import hypothesis.strategies as st
 import numpy as np
-from hypothesis import given, settings
+import pytest
+from hypothesis import given, infer, settings
 from numpy.testing import assert_allclose
 from pytest import raises
 
 from mygrad import (
     Tensor,
+    atleast_1d,
+    atleast_2d,
+    atleast_3d,
     broadcast_to,
     expand_dims,
     moveaxis,
+    no_autodiff,
     ravel,
     repeat,
     roll,
     swapaxes,
     transpose,
 )
+from mygrad.typing import ArrayLike
 from tests.utils.functools import add_constant_passthrough
 from tests.utils.wrappers import adds_constant_arg
 
@@ -479,3 +486,74 @@ def test_repeat_tuple_repeats_only_fwd():
 )
 def test_repeat_tuple_repeats_only_bkwd():
     pass
+
+
+def _wrap_list(x):
+    return x if isinstance(x, list) else [x]
+
+
+@pytest.mark.parametrize("func", [atleast_1d, atleast_2d, atleast_3d])
+@given(x=infer, constant=st.none() | st.booleans())
+def test_atleast_kd_fixed_point(func, x: List[ArrayLike], constant):
+    with no_autodiff:
+        out1 = _wrap_list(func(*x, constant=constant))
+        out2 = _wrap_list(func(*out1, constant=constant))
+        assert len(out1) == len(out2)
+        assert all(x is y for x, y in zip(out1, out2))
+
+
+@fwdprop_test_factory(
+    mygrad_func=atleast_1d,
+    true_func=np.atleast_1d,
+    num_arrays=1,
+)
+def test_atleast_1d_fwd():
+    pass
+
+
+@backprop_test_factory(
+    mygrad_func=add_constant_passthrough(np.atleast_1d),  # exercises __array_function__
+    true_func=np.atleast_1d,
+    num_arrays=1,
+    vary_each_element=True,
+)
+def test_atleast_1d_only_bkwd():
+    pass
+
+
+@fwdprop_test_factory(
+    mygrad_func=atleast_2d,
+    true_func=np.atleast_2d,
+    num_arrays=1,
+)
+def test_atleast_2d_fwd():
+    pass
+
+
+@backprop_test_factory(
+    mygrad_func=add_constant_passthrough(np.atleast_2d),  # exercises __array_function__
+    true_func=np.atleast_2d,
+    num_arrays=1,
+    vary_each_element=True,
+)
+def test_atleast_2d_only_bkwd():
+    pass
+
+
+@fwdprop_test_factory(
+    mygrad_func=atleast_3d,
+    true_func=np.atleast_3d,
+    num_arrays=1,
+)
+def test_atleast_3d_fwd():
+    pass
+
+
+@backprop_test_factory(
+    mygrad_func=add_constant_passthrough(np.atleast_3d),  # exercises __array_function__
+    true_func=np.atleast_3d,
+    num_arrays=1,
+    vary_each_element=True,
+)
+def test_atleast_3d_only_bkwd():
+    pass
diff --git a/tests/ufuncs/test_fwd_prop_and_backprop.py b/tests/ufuncs/test_fwd_prop_and_backprop.py
index 49dc505b..9b28a624 100644
--- a/tests/ufuncs/test_fwd_prop_and_backprop.py
+++ b/tests/ufuncs/test_fwd_prop_and_backprop.py
@@ -331,3 +331,17 @@ def test_arctan2_bkwd_pos_x():
 )
 def test_arctan2_bkwd_neg_x():
     pass
+
+
+def test_abs_nan_to_num():
+    x = mg.arange(-2.0, 3.0)
+    y = x.copy()
+    z = x.copy()
+
+    mg.abs(x, nan_to_num=False).backward()
+    mg.abs(y, nan_to_num=True).backward()
+    mg.abs(z).backward()
+
+    assert np.all(np.isnan(x.grad) == np.array([False, False, True, False, False]))
+    assert_allclose(np.nan_to_num(x.grad), y.grad)
+    assert_allclose(y.grad, z.grad)
diff --git a/tests/wrappers/uber.py b/tests/wrappers/uber.py
index 050cb8f6..d29943d2 100644
--- a/tests/wrappers/uber.py
+++ b/tests/wrappers/uber.py
@@ -8,6 +8,7 @@
 import hypothesis.strategies as st
 import numpy as np
 from hypothesis import assume, given, note
+from hypothesis.extra._array_helpers import MutuallyBroadcastableShapesStrategy
 from hypothesis.strategies import SearchStrategy
 from hypothesis.strategies._internal.lazy import LazyStrategy
 from numpy.testing import assert_allclose, assert_array_equal
@@ -15,8 +16,8 @@
 import mygrad._utils.lock_management as mem
 from mygrad import Tensor
 from mygrad.operation_base import Operation
-
 from tests.utils.checkers import expected_constant as _expected_constant
+
 from ..utils.numerical_gradient import (
     finite_difference,
     numerical_gradient,
@@ -103,8 +104,8 @@ def __init__(
         mygrad_func: Callable[[Tensor], Tensor],
         true_func: Callable[[np.ndarray], np.ndarray],
         num_arrays: Optional[int] = None,
-        shapes: Optional[hnp.MutuallyBroadcastableShapesStrategy] = None,
-        index_to_bnds: Dict[int, Tuple[int, int]] = None,
+        shapes: Optional[MutuallyBroadcastableShapesStrategy] = None,
+        index_to_bnds: Dict[int, Tuple[float, float]] = None,
         default_bnds: Tuple[float, float] = (-1e6, 1e6),
         index_to_no_go: Dict[int, Sequence[int]] = None,
         kwargs: Union[
@@ -129,7 +130,7 @@ def __init__(
         num_arrays : Optional[int]
             The number of arrays to be fed to the function
 
-        shapes : Optional[hnp.MutuallyBroadcastableShapesStrategy]
+        shapes : Optional[MutuallyBroadcastableShapesStrategy]
             A strategy that generates all of the input shapes to feed to the function.
 
         index_to_bnds : Dict[int, Tuple[int, int]]
@@ -191,7 +192,7 @@ def __init__(
             if not isinstance(shapes, st.SearchStrategy):
                 raise TypeError(
                     f"`shapes` should be "
-                    f"Optional[hnp.MutuallyBroadcastableShapesStrategy]"
+                    f"Optional[MutuallyBroadcastableShapesStrategy]"
                     f", got {shapes}"
                 )
 
@@ -199,10 +200,10 @@ def __init__(
                 shapes.wrapped_strategy if isinstance(shapes, LazyStrategy) else shapes
             )
 
-            if not isinstance(shapes_type, hnp.MutuallyBroadcastableShapesStrategy):
+            if not isinstance(shapes_type, MutuallyBroadcastableShapesStrategy):
                 raise TypeError(
                     f"`shapes` should be "
-                    f"Optional[hnp.MutuallyBroadcastableShapesStrategy]"
+                    f"Optional[MutuallyBroadcastableShapesStrategy]"
                     f", got {shapes}"
                 )
             num_arrays = shapes_type.num_shapes
@@ -429,7 +430,7 @@ def __init__(
         mygrad_func: Callable[[Tensor], Tensor],
         true_func: Callable[[np.ndarray], np.ndarray],
         num_arrays: Optional[int] = None,
-        shapes: Optional[hnp.MutuallyBroadcastableShapesStrategy] = None,
+        shapes: Optional[MutuallyBroadcastableShapesStrategy] = None,
         index_to_bnds: Optional[Dict[int, Tuple[int, int]]] = None,
         default_bnds: Tuple[float, float] = (-1e6, 1e6),
         index_to_no_go: Optional[Dict[int, Sequence[int]]] = None,
@@ -462,7 +463,7 @@ def __init__(
         num_arrays : Optional[int]
             The number of arrays that must be passed to ``mygrad_func``
 
-        shapes : Optional[hnp.MutuallyBroadcastableShapesStrategy]
+        shapes : Optional[MutuallyBroadcastableShapesStrategy]
             A strategy that generates all of the input shapes to feed to the function.
 
         index_to_bnds : Optional[Dict[int, Tuple[int, int]]]
@@ -568,7 +569,7 @@ def __init__(
             if not isinstance(shapes, st.SearchStrategy):
                 raise TypeError(
                     f"`shapes` should be "
-                    f"Optional[hnp.MutuallyBroadcastableShapesStrategy]"
+                    f"Optional[MutuallyBroadcastableShapesStrategy]"
                     f", got {shapes}"
                 )
 
@@ -576,10 +577,10 @@ def __init__(
                 shapes.wrapped_strategy if isinstance(shapes, LazyStrategy) else shapes
             )
 
-            if not isinstance(shapes_type, hnp.MutuallyBroadcastableShapesStrategy):
+            if not isinstance(shapes_type, MutuallyBroadcastableShapesStrategy):
                 raise TypeError(
                     f"`shapes` should be "
-                    f"Optional[hnp.MutuallyBroadcastableShapesStrategy]"
+                    f"Optional[MutuallyBroadcastableShapesStrategy]"
                     f", got {shapes}"
                 )
             num_arrays = shapes_type.num_shapes
@@ -754,6 +755,10 @@ def wrapper(shapes: hnp.BroadcastableShapes, data: st.DataObject):
             look_to = out.base if out.base is not None else out
             output_was_writeable = id(look_to.data) in mem._array_counter
 
+            if len(arrs) == 1 and out is arrs[0]:
+                # op returns reference of input
+                return
+            
             assert all(
                 a.data.flags.writeable is False for a in arrs
             ), "input array memory is not locked by op"