From d91fd1c70d550876ad30331f450cd274bc8ad853 Mon Sep 17 00:00:00 2001
From: "quant-ranger[bot]"
 <132915763+quant-ranger[bot]@users.noreply.github.com>
Date: Mon, 26 Jun 2023 08:10:06 +0100
Subject: [PATCH 01/32] Pre-commit autoupdate (#274)

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2fa1dbd2..94355447 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
     - id: isort-conda
       additional_dependencies: [toml]
  - repo: https://github.com/Quantco/pre-commit-mirrors-mypy
-   rev: "1.3.0"
+   rev: "1.4.0"
    hooks:
     - id: mypy-conda
       additional_dependencies:

From b788ddd44c22805ed81a9b92bd88e939c26fe556 Mon Sep 17 00:00:00 2001
From: "quant-ranger[bot]"
 <132915763+quant-ranger[bot]@users.noreply.github.com>
Date: Mon, 3 Jul 2023 08:39:23 +0100
Subject: [PATCH 02/32] Pre-commit autoupdate (#276)

Co-authored-by: quant-ranger[bot] <132915763+quant-ranger[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 94355447..5a37c63e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
     - id: isort-conda
       additional_dependencies: [toml]
  - repo: https://github.com/Quantco/pre-commit-mirrors-mypy
-   rev: "1.4.0"
+   rev: "1.4.1"
    hooks:
     - id: mypy-conda
       additional_dependencies:

From 212a1c6858f2d7b1e7e20772b513d4c2767f374c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 3 Jul 2023 08:40:15 +0100
Subject: [PATCH 03/32] Bump pypa/gh-action-pypi-publish from 1.8.6 to 1.8.7
 (#277)

Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.6 to 1.8.7.
- [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases)
- [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.6...v1.8.7)

---
updated-dependencies:
- dependency-name: pypa/gh-action-pypi-publish
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheels_release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_wheels_release.yml b/.github/workflows/build_wheels_release.yml
index 0edbde90..3f61d37c 100644
--- a/.github/workflows/build_wheels_release.yml
+++ b/.github/workflows/build_wheels_release.yml
@@ -60,7 +60,7 @@ jobs:
           name: artifact
           path: dist
 
-      - uses: pypa/gh-action-pypi-publish@v1.8.6
+      - uses: pypa/gh-action-pypi-publish@v1.8.7
         with:
           user: __token__
           password: ${{ secrets.GH_TESTPYPI_UPLOAD }}
@@ -75,7 +75,7 @@ jobs:
           name: artifact
           path: dist
 
-      - uses: pypa/gh-action-pypi-publish@v1.8.6
+      - uses: pypa/gh-action-pypi-publish@v1.8.7
         with:
           user: __token__
           password: ${{ secrets.GH_PYPI_UPLOAD }}

From 2391ada89e76dd19dc3549ee5bea17b04433d8b3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 17 Jul 2023 08:29:56 -0400
Subject: [PATCH 04/32] Bump pypa/gh-action-pypi-publish from 1.8.7 to 1.8.8
 (#279)

Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.7 to 1.8.8.
- [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases)
- [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.7...v1.8.8)

---
updated-dependencies:
- dependency-name: pypa/gh-action-pypi-publish
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheels_release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_wheels_release.yml b/.github/workflows/build_wheels_release.yml
index 3f61d37c..b43f3018 100644
--- a/.github/workflows/build_wheels_release.yml
+++ b/.github/workflows/build_wheels_release.yml
@@ -60,7 +60,7 @@ jobs:
           name: artifact
           path: dist
 
-      - uses: pypa/gh-action-pypi-publish@v1.8.7
+      - uses: pypa/gh-action-pypi-publish@v1.8.8
         with:
           user: __token__
           password: ${{ secrets.GH_TESTPYPI_UPLOAD }}
@@ -75,7 +75,7 @@ jobs:
           name: artifact
           path: dist
 
-      - uses: pypa/gh-action-pypi-publish@v1.8.7
+      - uses: pypa/gh-action-pypi-publish@v1.8.8
         with:
           user: __token__
           password: ${{ secrets.GH_PYPI_UPLOAD }}

From 6e756dd55523fe9d8e338d95c13feba80a17f896 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 17 Jul 2023 08:30:20 -0400
Subject: [PATCH 05/32] Bump pypa/cibuildwheel from 2.13.1 to 2.14.1 (#280)

Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.13.1 to 2.14.1.
- [Release notes](https://github.com/pypa/cibuildwheel/releases)
- [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md)
- [Commits](https://github.com/pypa/cibuildwheel/compare/v2.13.1...v2.14.1)

---
updated-dependencies:
- dependency-name: pypa/cibuildwheel
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheels.yml         | 2 +-
 .github/workflows/build_wheels_release.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index b531b3bc..e147b2bf 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -20,7 +20,7 @@ jobs:
         with:
           platforms: all
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.13.1
+        uses: pypa/cibuildwheel@v2.14.1
         env:
           CIBW_ARCHS_LINUX: auto
           CIBW_ARCHS_MACOS: x86_64 arm64
diff --git a/.github/workflows/build_wheels_release.yml b/.github/workflows/build_wheels_release.yml
index b43f3018..9f6cd123 100644
--- a/.github/workflows/build_wheels_release.yml
+++ b/.github/workflows/build_wheels_release.yml
@@ -21,7 +21,7 @@ jobs:
         with:
           platforms: all
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.13.1
+        uses: pypa/cibuildwheel@v2.14.1
         env:
           CIBW_ARCHS_LINUX: auto aarch64
           CIBW_ARCHS_MACOS: x86_64 arm64

From 31ca04674e94d69f67b8f9fe844269405f31dad6 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Tue, 18 Jul 2023 17:17:08 +0200
Subject: [PATCH 06/32] Minimal implementation (tests green)

---
 src/tabmat/categorical_matrix.py |  6 ++-
 src/tabmat/dense_matrix.py       | 84 +++++++++++++++++++++++---------
 src/tabmat/sparse_matrix.py      |  9 ++--
 tests/test_matrices.py           |  4 +-
 4 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 44c47efd..7a751fdb 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -452,8 +452,10 @@ def _cross_sandwich(
         R_cols: Optional[np.ndarray] = None,
     ) -> np.ndarray:
         """Perform a sandwich product: X.T @ diag(d) @ Y."""
-        if isinstance(other, np.ndarray):
-            return self._cross_dense(other, d, rows, L_cols, R_cols)
+        from .dense_matrix import DenseMatrix
+
+        if isinstance(other, (np.ndarray, DenseMatrix)):
+            return self._cross_dense(np.asarray(other), d, rows, L_cols, R_cols)
         if isinstance(other, sps.csc_matrix):
             return self._cross_sparse(other, d, rows, L_cols, R_cols)
         if isinstance(other, CategoricalMatrix):
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 84ef1f1d..c854041f 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -17,7 +17,7 @@
 )
 
 
-class DenseMatrix(np.ndarray, MatrixBase):
+class DenseMatrix(np.lib.mixins.NDArrayOperatorsMixin, MatrixBase):
     """
     A ``numpy.ndarray`` subclass with several additional functions that allow
     it to share the MatrixBase API with SparseMatrix and CategoricalMatrix.
@@ -32,29 +32,65 @@ class DenseMatrix(np.ndarray, MatrixBase):
 
     """
 
-    def __new__(cls, input_array):  # noqa
-        """
-        Details of how to subclass np.ndarray are explained here:
+    def __init__(self, input_array):
+        self._array = np.asarray(input_array)
 
-        https://docs.scipy.org/doc/numpy/user/basics.subclassing.html\
-            #slightly-more-realistic-example-attribute-added-to-existing-array
-        """
-        obj = np.asarray(input_array).view(cls)
-        if not np.issubdtype(obj.dtype, np.floating):
-            raise NotImplementedError("DenseMatrix is only implemented for float data")
-        return obj
+    def __getitem__(self, key):
+        return type(self)(self._array.__getitem__(key))
+
+    def __array__(self, dtype=None):
+        return self._array.astype(dtype, copy=False)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        inputs = (x._array if isinstance(x, DenseMatrix) else x for x in inputs)
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+        if method in ["__call__", "accumulate"]:
+            return type(self)(result)
+        else:
+            return result
+
+    def __matmul__(self, other):
+        return self._array.__matmul__(other)
+
+    def __rmatmul__(self, other):
+        return self._array.__rmatmul__(other)
+
+    @property
+    def shape(self):
+        """Tuple of array dimensions."""
+        return self._array.shape
+
+    @property
+    def ndim(self):
+        """Number of array dimensions."""  # noqa: D401
+        return self._array.ndim
+
+    @property
+    def dtype(self):
+        """Data-type of the array’s elements."""  # noqa: D401
+        return self._array.dtype
+
+    def transpose(self):
+        """Returns a view of the array with axes transposed."""  # noqa: D401
+        return type(self)(self._array.T)
+
+    T = property(transpose)
+
+    def astype(self, dtype, order="K", casting="unsafe", copy=True):
+        """Copy of the array, cast to a specified type."""
+        return type(self)(self._array.astype(dtype, order, casting, copy))
 
-    def __array_finalize__(self, obj):
-        if obj is None:
-            return
+    def sum(self, *args, **kwargs):
+        """Return the sum of the array elements over the given axis."""
+        return self._array.sum(*args, **kwargs)
 
     def getcol(self, i):
         """Return matrix column at specified index."""
-        return self[:, [i]]
+        return type(self)(self._array[:, [i]])
 
     def toarray(self):
         """Return array representation of matrix."""
-        return np.asarray(self)
+        return self._array
 
     def sandwich(
         self, d: np.ndarray, rows: np.ndarray = None, cols: np.ndarray = None
@@ -62,7 +98,7 @@ def sandwich(
         """Perform a sandwich product: X.T @ diag(d) @ X."""
         d = np.asarray(d)
         rows, cols = setup_restrictions(self.shape, rows, cols)
-        return dense_sandwich(self, d, rows, cols)
+        return dense_sandwich(self._array, d, rows, cols)
 
     def _cross_sandwich(
         self,
@@ -81,7 +117,7 @@ def _cross_sandwich(
 
     def _get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarray:
         """Get standard deviations of columns."""
-        sqrt_arg = transpose_square_dot_weights(self, weights) - col_means**2
+        sqrt_arg = transpose_square_dot_weights(self._array, weights) - col_means**2
         # Minor floating point errors above can result in a very slightly
         # negative sqrt_arg (e.g. -5e-16). We just set those values equal to
         # zero.
@@ -105,7 +141,7 @@ def _matvec_helper(
         # this without an explosion of code?
         vec = np.asarray(vec)
         check_matvec_dimensions(self, vec, transpose=transpose)
-        X = self.T if transpose else self
+        X = self._array.T if transpose else self._array
 
         # NOTE: We assume that rows and cols are unique
         unrestricted_rows = rows is None or len(rows) == self.shape[0]
@@ -122,11 +158,11 @@ def _matvec_helper(
             # TODO: should take 'out' parameter
             fast_fnc = dense_rmatvec if transpose else dense_matvec
             if vec.ndim == 1:
-                res = fast_fnc(self, vec, rows, cols)
+                res = fast_fnc(self._array, vec, rows, cols)
             elif vec.ndim == 2 and vec.shape[1] == 1:
-                res = fast_fnc(self, vec[:, 0], rows, cols)[:, None]
+                res = fast_fnc(self._array, vec[:, 0], rows, cols)[:, None]
             else:
-                subset = self[np.ix_(rows, cols)]
+                subset = self._array[np.ix_(rows, cols)]
                 res = subset.T.dot(vec[rows]) if transpose else subset.dot(vec[cols])
             if out is None:
                 return res
@@ -164,5 +200,5 @@ def multiply(self, other):
         This assumes that ``other`` is a vector of size ``self.shape[0]``.
         """
         if np.asanyarray(other).ndim == 1:
-            return super().__mul__(other[:, np.newaxis])
-        return super().__mul__(other)
+            return type(self)(self._array.__mul__(other[:, np.newaxis]))
+        return type(self)(self._array.__mul__(other))
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 7f1b44ad..3befbad9 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -59,8 +59,7 @@ def sandwich(
         self, d: np.ndarray, rows: np.ndarray = None, cols: np.ndarray = None
     ) -> np.ndarray:
         """Perform a sandwich product: X.T @ diag(d) @ X."""
-        if not hasattr(d, "dtype"):
-            d = np.asarray(d)
+        d = np.asarray(d)
         if not self.dtype == d.dtype:
             raise TypeError(
                 f"""self and d need to be of same dtype, either np.float64
@@ -80,9 +79,11 @@ def _cross_sandwich(
         R_cols: Optional[np.ndarray] = None,
     ):
         """Perform a sandwich product: X.T @ diag(d) @ Y."""
-        if isinstance(other, np.ndarray):
-            return self.sandwich_dense(other, d, rows, L_cols, R_cols)
         from .categorical_matrix import CategoricalMatrix
+        from .dense_matrix import DenseMatrix
+
+        if isinstance(other, (np.ndarray, DenseMatrix)):
+            return self.sandwich_dense(np.asarray(other), d, rows, L_cols, R_cols)
 
         if isinstance(other, CategoricalMatrix):
             return other._cross_sandwich(self, d, rows, R_cols, L_cols).T
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 64317747..5d314c7e 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -24,7 +24,7 @@ def dense_matrix_C() -> tm.DenseMatrix:
 
 def dense_matrix_not_writeable() -> tm.DenseMatrix:
     mat = dense_matrix_F()
-    mat.setflags(write=False)
+    mat._array.setflags(write=False)
     return mat
 
 
@@ -440,7 +440,7 @@ def test_rmatmul(mat: Union[tm.MatrixBase, tm.StandardizedMatrix], vec_type):
     expected = vec_as_list @ mat.A
     np.testing.assert_allclose(res, expected)
     np.testing.assert_allclose(res2, expected)
-    assert isinstance(res, np.ndarray)
+    assert isinstance(res, (np.ndarray, tm.DenseMatrix))
 
 
 @pytest.mark.parametrize("mat", get_matrices())

From cce460fadff5b95e95c07ff24a68dc0f647958b4 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Tue, 18 Jul 2023 20:29:18 +0200
Subject: [PATCH 07/32] Improve the performance of `from_pandas` in the case of
 low-cardinality categoricals (#275)

* Improve the performance of `from_pandas`

* Update changelog according to review
---
 CHANGELOG.rst             |  7 +++++++
 src/tabmat/constructor.py | 12 ++----------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 83c8aceb..331695b1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,13 @@
 Changelog
 =========
 
+Unreleased
+----------
+
+**Other changes:**
+
+- Improve the performance of ``from_pandas`` in the case of low-cardinality categorical variables.
+
 3.1.10 - 2023-06-23
 -------------------
 
diff --git a/src/tabmat/constructor.py b/src/tabmat/constructor.py
index b782f2da..f8e23c31 100644
--- a/src/tabmat/constructor.py
+++ b/src/tabmat/constructor.py
@@ -72,6 +72,7 @@ def from_pandas(
         if object_as_cat and coldata.dtype == object:
             coldata = coldata.astype("category")
         if isinstance(coldata.dtype, pd.CategoricalDtype):
+            cat = CategoricalMatrix(coldata, drop_first=drop_first, dtype=dtype)
             if len(coldata.cat.categories) < cat_threshold:
                 (
                     X_dense_F,
@@ -79,15 +80,7 @@ def from_pandas(
                     dense_indices,
                     sparse_indices,
                 ) = _split_sparse_and_dense_parts(
-                    pd.get_dummies(
-                        coldata,
-                        prefix=colname,
-                        sparse=True,
-                        drop_first=drop_first,
-                        dtype=np.float64,
-                    )
-                    .sparse.to_coo()
-                    .tocsc(),
+                    sps.csc_matrix(cat.tocsr(), dtype=dtype),
                     threshold=sparse_threshold,
                 )
                 matrices.append(X_dense_F)
@@ -103,7 +96,6 @@ def from_pandas(
                     indices.append(sparse_indices)
 
             else:
-                cat = CategoricalMatrix(coldata, drop_first=drop_first, dtype=dtype)
                 matrices.append(cat)
                 is_cat.append(True)
                 if cat_position == "expand":

From 24525c8fd5c82a705addd5f828909d859eebb1ad Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 19 Jul 2023 09:33:51 +0200
Subject: [PATCH 08/32] Remove sum method and rely on np.sum

---
 src/tabmat/dense_matrix.py     | 4 ----
 src/tabmat/standardized_mat.py | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index c854041f..ade5d355 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -80,10 +80,6 @@ def astype(self, dtype, order="K", casting="unsafe", copy=True):
         """Copy of the array, cast to a specified type."""
         return type(self)(self._array.astype(dtype, order, casting, copy))
 
-    def sum(self, *args, **kwargs):
-        """Return the sum of the array elements over the given axis."""
-        return self._array.sum(*args, **kwargs)
-
     def getcol(self, i):
         """Return matrix column at specified index."""
         return type(self)(self._array[:, [i]])
diff --git a/src/tabmat/standardized_mat.py b/src/tabmat/standardized_mat.py
index 0d8a0190..19b04f5a 100644
--- a/src/tabmat/standardized_mat.py
+++ b/src/tabmat/standardized_mat.py
@@ -147,7 +147,7 @@ def sandwich(
 
         limited_shift = self.shift[cols] if cols is not None else self.shift
         limited_d = d[rows] if rows is not None else d
-        term3_and_4 = np.outer(limited_shift, d_mat + limited_shift * limited_d.sum())
+        term3_and_4 = np.outer(limited_shift, d_mat + limited_shift * np.sum(limited_d))
         res = term2 + term3_and_4
         if isinstance(term1, sps.dia_matrix):
             idx = np.arange(res.shape[0])

From 1e31779bd9b98190d3f0037b1c41280c47c71f58 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 19 Jul 2023 16:00:07 +0200
Subject: [PATCH 09/32] Force DenseMatrix to always be 2-dimensional

---
 src/tabmat/dense_matrix.py | 16 +++++++++++++++-
 tests/test_matrices.py     | 11 ++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index ade5d355..dcad444a 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -33,9 +33,22 @@ class DenseMatrix(np.lib.mixins.NDArrayOperatorsMixin, MatrixBase):
     """
 
     def __init__(self, input_array):
+        input_array = np.asarray(input_array)
+
+        if input_array.ndim == 1:
+            input_array = input_array.reshape(-1, 1)
+        elif input_array.ndim > 2:
+            raise ValueError("Input array must be 1- or 2-dimensional")
+
         self._array = np.asarray(input_array)
 
     def __getitem__(self, key):
+        if not isinstance(key, tuple):
+            key = (key,)
+
+        # Always return a 2d array
+        key = tuple([key_i] if np.isscalar(key_i) else key_i for key_i in key)
+
         return type(self)(self._array.__getitem__(key))
 
     def __array__(self, dtype=None):
@@ -44,7 +57,8 @@ def __array__(self, dtype=None):
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         inputs = (x._array if isinstance(x, DenseMatrix) else x for x in inputs)
         result = getattr(ufunc, method)(*inputs, **kwargs)
-        if method in ["__call__", "accumulate"]:
+        if method in ("call", "accumulate") and ufunc.signature is None:
+            # Does not change shape
             return type(self)(result)
         else:
             return result
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 5d314c7e..779b160c 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -233,7 +233,7 @@ def test_to_array_standardized_mat(mat: tm.StandardizedMatrix):
 @pytest.mark.parametrize("mat", get_matrices())
 @pytest.mark.parametrize(
     "other_type",
-    [lambda x: x, np.asarray, tm.DenseMatrix],
+    [lambda x: x, np.asarray],
 )
 @pytest.mark.parametrize("cols", [None, [], [1], np.array([1])])
 @pytest.mark.parametrize("other_shape", [[], [1], [2]])
@@ -243,7 +243,7 @@ def test_matvec(
     """
     Mat.
 
-    other_type: Function transforming list to list, array, or DenseMatrix
+    t: Function transforming list to list, array, or DenseMatrix
     cols: Argument 1 to matvec, specifying which columns of the matrix (and
         which elements of 'other') to use
     other_shape: Second dimension of 'other.shape', if any. If other_shape is [], then
@@ -303,7 +303,7 @@ def process_mat_vec_subsets(mat, vec, mat_rows, mat_cols, vec_idxs):
 @pytest.mark.parametrize("mat", get_matrices())
 @pytest.mark.parametrize(
     "other_type",
-    [lambda x: x, np.array, tm.DenseMatrix],
+    [lambda x: x, np.array],
 )
 @pytest.mark.parametrize("rows", [None, [], [2], np.arange(2)])
 @pytest.mark.parametrize("cols", [None, [], [1], np.arange(1)])
@@ -373,7 +373,7 @@ def test_cross_sandwich(
 @pytest.mark.parametrize("mat", get_matrices())
 @pytest.mark.parametrize(
     "vec_type",
-    [lambda x: x, np.array, tm.DenseMatrix],
+    [lambda x: x, np.array],
 )
 @pytest.mark.parametrize("rows", [None, [], [1], np.arange(2)])
 @pytest.mark.parametrize("cols", [None, [], [0], np.arange(1)])
@@ -430,7 +430,7 @@ def test_transpose(mat):
 @pytest.mark.parametrize("mat", get_matrices())
 @pytest.mark.parametrize(
     "vec_type",
-    [lambda x: x, np.array, tm.DenseMatrix],
+    [lambda x: x, np.array],
 )
 def test_rmatmul(mat: Union[tm.MatrixBase, tm.StandardizedMatrix], vec_type):
     vec_as_list = [3.0, -0.1, 0]
@@ -559,6 +559,7 @@ def test_indexing_int_row(mat: Union[tm.MatrixBase, tm.StandardizedMatrix]):
 @pytest.mark.parametrize("mat", get_matrices())
 def test_indexing_range_row(mat: Union[tm.MatrixBase, tm.StandardizedMatrix]):
     res = mat[0:2, :]
+    assert res.ndim == 2
     if not isinstance(res, np.ndarray):
         res = res.A
     expected = mat.A[0:2, :]

From 755e6341747fb807dbb3ef5dd87b91f857b88630 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 19 Jul 2023 16:40:17 +0200
Subject: [PATCH 10/32] Add __repr__ and __str__ methods

---
 src/tabmat/dense_matrix.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index dcad444a..842d4464 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -1,3 +1,4 @@
+import textwrap
 from typing import List, Optional, Union
 
 import numpy as np
@@ -69,6 +70,18 @@ def __matmul__(self, other):
     def __rmatmul__(self, other):
         return self._array.__rmatmul__(other)
 
+    def __str__(self):
+        return "{}x{} DenseMatrix:\n\n".format(*self.shape) + np.array_str(self._array)
+
+    def __repr__(self):
+        class_name = type(self).__name__
+        array_str = f"{class_name}({np.array2string(self._array, separator=', ')})"
+        return textwrap.indent(
+            array_str,
+            " " * (len(class_name) + 1),
+            predicate=lambda line: not line.startswith(class_name),
+        )
+
     @property
     def shape(self):
         """Tuple of array dimensions."""

From 8c89462f196e6e9ca7de6da3605913d7fd8a77eb Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 19 Jul 2023 20:02:42 +0200
Subject: [PATCH 11/32] Add benchmark data to .gitignore (#282)

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index c528d376..186ba948 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Project-specific
 benchmark/*.csv
+benchmark/data/*.pkl
 
 # Files created by templating
 dense.cpp

From 0560529f441775bcbd129dfaf26f206ce8be934e Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 20 Jul 2023 10:31:35 +0200
Subject: [PATCH 12/32] Fix as_mx

---
 src/tabmat/split_matrix.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py
index a7618912..aaf88414 100644
--- a/src/tabmat/split_matrix.py
+++ b/src/tabmat/split_matrix.py
@@ -29,7 +29,7 @@ def as_mx(a: Any):
         return a
     elif sps.issparse(a):
         return SparseMatrix(a)
-    elif isinstance(a, np.ndarray):
+    elif isinstance(a, (np.ndarray, DenseMatrix)):
         return DenseMatrix(a)
     else:
         raise ValueError(f"Cannot convert type {type(a)} to Matrix.")

From 80143ef6550d537e53defc73649a3f112aa1ec3e Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 20 Jul 2023 12:31:38 +0200
Subject: [PATCH 13/32] Fix ufunc return value

---
 src/tabmat/dense_matrix.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 842d4464..953fe505 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -58,7 +58,7 @@ def __array__(self, dtype=None):
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         inputs = (x._array if isinstance(x, DenseMatrix) else x for x in inputs)
         result = getattr(ufunc, method)(*inputs, **kwargs)
-        if method in ("call", "accumulate") and ufunc.signature is None:
+        if method in ("__call__", "accumulate") and ufunc.signature is None:
             # Does not change shape
             return type(self)(result)
         else:

From 34d6f37cc6c442d877ec3cb2b191c6f519130228 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 20 Jul 2023 17:08:49 +0200
Subject: [PATCH 14/32] Wrap SparseMatrix, too

---
 src/tabmat/sparse_matrix.py | 96 ++++++++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 27 deletions(-)

diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 3befbad9..11043bb8 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -31,29 +31,65 @@ class SparseMatrix(sps.csc_matrix, MatrixBase):
     """
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
-        super().__init__(arg1, shape, dtype, copy)
-        self.idx_dtype = max(self.indices.dtype, self.indptr.dtype)
-        if self.indices.dtype != self.idx_dtype:
-            self.indices = self.indices.astype(self.idx_dtype)
-        if self.indptr.dtype != self.idx_dtype:
-            self.indptr = self.indptr.astype(self.idx_dtype)
+        self._array = sps.csc_matrix(arg1, shape, dtype, copy)
+
+        self.idx_dtype = max(self._array.indices.dtype, self._array.indptr.dtype)
+        if self._array.indices.dtype != self.idx_dtype:
+            self._array.indices = self._array.indices.astype(self.idx_dtype)
+        if self._array.indptr.dtype != self.idx_dtype:
+            self._array.indptr = self._array.indptr.astype(self.idx_dtype)
         assert self.indices.dtype == self.idx_dtype
 
-        if not self.has_sorted_indices:
-            self.sort_indices()
-        self._x_csr = None
+        if not self._array.has_sorted_indices:
+            self._array.sort_indices()
+        self._array_csr = None
+
+    @property
+    def shape(self):
+        """Tuple of array dimensions."""
+        return self._array.shape
+
+    @property
+    def ndim(self):
+        """Number of array dimensions."""  # noqa: D401
+        return self._array.ndim
+
+    @property
+    def dtype(self):
+        """Data-type of the array’s elements."""  # noqa: D401
+        return self._array.dtype
+
+    @property
+    def indices(self):
+        """Indices of the matrix."""  # noqa: D401
+        return self._array.indices
+
+    @property
+    def indptr(self):
+        """Indptr of the matrix."""  # noqa: D401
+        return self._array.indptr
+
+    @property
+    def data(self):
+        """Data of the matrix."""  # noqa: D401
+        return self._array.data
+
+    @property
+    def array_csc(self):
+        """Return the CSC representation of the matrix."""
+        return self._array
 
     @property
-    def x_csr(self):
+    def array_csr(self):
         """Cache the CSR representation of the matrix."""
-        if self._x_csr is None:
-            self._x_csr = self.tocsr(copy=False)
-            if self._x_csr.indices.dtype != self.idx_dtype:
-                self._x_csr.indices = self._x_csr.indices.astype(self.idx_dtype)
-            if self._x_csr.indptr.dtype != self.idx_dtype:
-                self._x_csr.indptr = self._x_csr.indptr.astype(self.idx_dtype)
+        if self._array_csr is None:
+            self._array_csr = self._array.tocsr(copy=False)
+            if self._array_csr.indices.dtype != self.idx_dtype:
+                self._array_csr.indices = self._array_csr.indices.astype(self.idx_dtype)
+            if self._array_csr.indptr.dtype != self.idx_dtype:
+                self._array_csr.indptr = self._array_csr.indptr.astype(self.idx_dtype)
 
-        return self._x_csr
+        return self._array_csr
 
     def sandwich(
         self, d: np.ndarray, rows: np.ndarray = None, cols: np.ndarray = None
@@ -68,7 +104,7 @@ def sandwich(
             )
 
         rows, cols = setup_restrictions(self.shape, rows, cols, dtype=self.idx_dtype)
-        return sparse_sandwich(self, self.x_csr, d, rows, cols)
+        return sparse_sandwich(self, self.array_csr, d, rows, cols)
 
     def _cross_sandwich(
         self,
@@ -112,7 +148,7 @@ def sandwich_dense(
 
         rows, L_cols = setup_restrictions(self.shape, rows, L_cols)
         R_cols = set_up_rows_or_cols(R_cols, B.shape[1])
-        return csr_dense_sandwich(self.x_csr, B, d, rows, L_cols, R_cols)
+        return csr_dense_sandwich(self.array_csr, B, d, rows, L_cols, R_cols)
 
     def _matvec_helper(
         self,
@@ -129,9 +165,11 @@ def _matvec_helper(
         unrestricted_cols = cols is None or len(cols) == self.shape[1]
         if unrestricted_rows and unrestricted_cols and vec.ndim == 1:
             if transpose:
-                return csc_rmatvec_unrestricted(self, vec, out, self.indices)
+                return csc_rmatvec_unrestricted(self.array_csc, vec, out, self.indices)
             else:
-                return csr_matvec_unrestricted(self.x_csr, vec, out, self.x_csr.indices)
+                return csr_matvec_unrestricted(
+                    self.array_csr, vec, out, self.array_csr.indices
+                )
 
         matrix_matvec = lambda x, v: sps.csc_matrix.dot(x, v)
         if transpose:
@@ -139,9 +177,9 @@ def _matvec_helper(
 
         rows, cols = setup_restrictions(self.shape, rows, cols, dtype=self.idx_dtype)
         if transpose:
-            fast_fnc = lambda v: csc_rmatvec(self, v, rows, cols)
+            fast_fnc = lambda v: csc_rmatvec(self.array_csc, v, rows, cols)
         else:
-            fast_fnc = lambda v: csr_matvec(self.x_csr, v, rows, cols)
+            fast_fnc = lambda v: csr_matvec(self.array_csr, v, rows, cols)
         if vec.ndim == 1:
             res = fast_fnc(vec)
         elif vec.ndim == 2 and vec.shape[1] == 1:
@@ -180,7 +218,11 @@ def _get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarra
         """Get standard deviations of columns."""
         sqrt_arg = (
             transpose_square_dot_weights(
-                self.data, self.indices, self.indptr, weights, weights.dtype
+                self._array.data,
+                self._array.indices,
+                self._array.indptr,
+                weights,
+                weights.dtype,
             )
             - col_means**2
         )
@@ -192,7 +234,7 @@ def _get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarra
 
     def astype(self, dtype, order="K", casting="unsafe", copy=True):
         """Return SparseMatrix cast to new type."""
-        return super().astype(dtype, casting, copy)
+        return type(self)(self._array.astype(dtype, casting, copy))
 
     def multiply(self, other):
         """Element-wise multiplication.
@@ -202,5 +244,5 @@ def multiply(self, other):
         ``self.shape[0]``.
         """
         if other.ndim == 1:
-            return SparseMatrix(super().multiply(other[:, np.newaxis]))
-        return SparseMatrix(super().multiply(other))
+            return type(self)(self._array.multiply(other[:, np.newaxis]))
+        return type(self)(self._array.multiply(other))

From 97349f4a7d1a5bfd826d602af14b9b01046ef9e4 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 20 Jul 2023 17:31:25 +0200
Subject: [PATCH 15/32] Demo of how the ufunc interface can be implemented

---
 src/tabmat/sparse_matrix.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 11043bb8..8074a67c 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -44,6 +44,26 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False):
             self._array.sort_indices()
         self._array_csr = None
 
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        from .dense_matrix import DenseMatrix
+
+        if ufunc.nin == 1 and ufunc.nout == 1:
+            if getattr(ufunc, method)(0) == 0:
+                result_matrix = sps.csc_matrix(
+                    (
+                        getattr(ufunc, method)(self._array.data, **kwargs),
+                        self._array.indices,
+                        self._array.indptr,
+                    ),
+                    shape=self._array.shape,
+                )
+                return type(self)(result_matrix)
+            else:
+                result_matrix = getattr(ufunc, method)(self._array.todense(), **kwargs)
+                return DenseMatrix(result_matrix)
+        else:
+            return NotImplemented
+
     @property
     def shape(self):
         """Tuple of array dimensions."""
@@ -201,8 +221,6 @@ def matvec(self, vec, cols: np.ndarray = None, out: np.ndarray = None):
         check_matvec_out_shape(self, out)
         return self._matvec_helper(vec, None, cols, out, False)
 
-    __array_priority__ = 12
-
     def transpose_matvec(
         self,
         vec: Union[np.ndarray, List],

From e86c0058350f2b0bb9451d923feedfb56f0673a0 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 20 Jul 2023 19:12:12 +0200
Subject: [PATCH 16/32] Do not subclass csc_matrix

---
 src/tabmat/categorical_matrix.py |  2 ++
 src/tabmat/sparse_matrix.py      | 31 +++++++++++++++++++++++++++++--
 src/tabmat/split_matrix.py       | 10 ++++++++--
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 7a751fdb..6e781691 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -458,6 +458,8 @@ def _cross_sandwich(
             return self._cross_dense(np.asarray(other), d, rows, L_cols, R_cols)
         if isinstance(other, sps.csc_matrix):
             return self._cross_sparse(other, d, rows, L_cols, R_cols)
+        if isinstance(other, SparseMatrix):
+            return self._cross_sparse(other.array_csc, d, rows, L_cols, R_cols)
         if isinstance(other, CategoricalMatrix):
             return self._cross_categorical(other, d, rows, L_cols, R_cols)
         raise TypeError
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 8074a67c..40644763 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -22,7 +22,7 @@
 )
 
 
-class SparseMatrix(sps.csc_matrix, MatrixBase):
+class SparseMatrix(MatrixBase):
     """
     A scipy.sparse csc matrix subclass that allows such objects to conform
     to the ``MatrixBase`` interface.
@@ -44,6 +44,15 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False):
             self._array.sort_indices()
         self._array_csr = None
 
+    def __getitem__(self, key):
+        if not isinstance(key, tuple):
+            key = (key,)
+
+        # Always return a 2d array
+        key = tuple([key_i] if np.isscalar(key_i) else key_i for key_i in key)
+
+        return type(self)(self._array.__getitem__(key))
+
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         from .dense_matrix import DenseMatrix
 
@@ -111,6 +120,24 @@ def array_csr(self):
 
         return self._array_csr
 
+    def transpose(self):
+        """Returns a view of the array with axes transposed."""  # noqa: D401
+        return type(self)(self._array.T)
+
+    T = property(transpose)
+
+    def getcol(self, i):
+        """Return matrix column at specified index."""
+        return type(self)(self._array.getcol(i))
+
+    def toarray(self):
+        """Return a dense ndarray representation of the matrix."""
+        return self._array.toarray()
+
+    def dot(self, other):
+        """Return the dot product as a scipy sparse matrix."""
+        return self._array.dot(other)
+
     def sandwich(
         self, d: np.ndarray, rows: np.ndarray = None, cols: np.ndarray = None
     ) -> np.ndarray:
@@ -206,7 +233,7 @@ def _matvec_helper(
             res = fast_fnc(vec[:, 0])[:, None]
         else:
             res = matrix_matvec(
-                self[np.ix_(rows, cols)], vec[rows] if transpose else vec[cols]
+                self[np.ix_(rows, cols)]._array, vec[rows] if transpose else vec[cols]
             )
         if out is None:
             return res
diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py
index aaf88414..2f7438fa 100644
--- a/src/tabmat/split_matrix.py
+++ b/src/tabmat/split_matrix.py
@@ -75,8 +75,14 @@ def _combine_matrices(matrices, indices):
     n_row = matrices[0].shape[0]
 
     for mat_type_, stack_fn in [
-        (DenseMatrix, np.hstack),
-        (SparseMatrix, sps.hstack),
+        (
+            DenseMatrix,
+            lambda matrices: np.hstack([mat._array for mat in matrices]),
+        ),
+        (
+            SparseMatrix,
+            lambda matrices: sps.hstack([mat._array for mat in matrices]),
+        ),
     ]:
         this_type_matrices = [
             i for i, mat in enumerate(matrices) if isinstance(mat, mat_type_)

From 5a88fbc45a0eb80f7a8cb9907899e29eeac06bea Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Fri, 21 Jul 2023 09:03:01 +0200
Subject: [PATCH 17/32] Demonstrate binary ufuncs for sparse

---
 src/tabmat/categorical_matrix.py |  4 +---
 src/tabmat/dense_matrix.py       |  3 +++
 src/tabmat/sparse_matrix.py      | 11 +++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 6e781691..1a996cc0 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -454,10 +454,8 @@ def _cross_sandwich(
         """Perform a sandwich product: X.T @ diag(d) @ Y."""
         from .dense_matrix import DenseMatrix
 
-        if isinstance(other, (np.ndarray, DenseMatrix)):
+        if isinstance(other, DenseMatrix):
             return self._cross_dense(np.asarray(other), d, rows, L_cols, R_cols)
-        if isinstance(other, sps.csc_matrix):
-            return self._cross_sparse(other, d, rows, L_cols, R_cols)
         if isinstance(other, SparseMatrix):
             return self._cross_sparse(other.array_csc, d, rows, L_cols, R_cols)
         if isinstance(other, CategoricalMatrix):
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 953fe505..4bd76501 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -56,6 +56,9 @@ def __array__(self, dtype=None):
         return self._array.astype(dtype, copy=False)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        if not all(isinstance(x, (np.ndarray, DenseMatrix)) for x in inputs):
+            return NotImplemented
+
         inputs = (x._array if isinstance(x, DenseMatrix) else x for x in inputs)
         result = getattr(ufunc, method)(*inputs, **kwargs)
         if method in ("__call__", "accumulate") and ufunc.signature is None:
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 40644763..b144dcc1 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -70,6 +70,17 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
             else:
                 result_matrix = getattr(ufunc, method)(self._array.todense(), **kwargs)
                 return DenseMatrix(result_matrix)
+
+        elif ufunc == np.multiply:
+            if isinstance(inputs[0], SparseMatrix) and isinstance(
+                inputs[1], SparseMatrix
+            ):
+                return SparseMatrix(inputs[0].array_csc.multiply(inputs[1].array_csc))
+            elif isinstance(inputs[0], SparseMatrix):
+                return SparseMatrix(inputs[0].array_csc.multiply(inputs[1]))
+            else:
+                return SparseMatrix(inputs[1].array_csc.multiply(inputs[0]))
+
         else:
             return NotImplemented
 

From 44e1970422db6258d19b179219e7c81c13ddeac8 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Fri, 21 Jul 2023 09:07:14 +0200
Subject: [PATCH 18/32] Add tocsc method

---
 src/tabmat/sparse_matrix.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index b144dcc1..bda43efd 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -131,6 +131,10 @@ def array_csr(self):
 
         return self._array_csr
 
+    def tocsc(self, copy=False):
+        """Return the matrix in CSC format."""
+        return self._array.tocsc(copy=copy)
+
     def transpose(self):
         """Returns a view of the array with axes transposed."""  # noqa: D401
         return type(self)(self._array.T)

From ffe918e4378c8b1b5d0bfb9c7ade57bb77fccc5a Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Fri, 21 Jul 2023 09:39:56 +0200
Subject: [PATCH 19/32] Fix type checks

---
 src/tabmat/dense_matrix.py  |  4 ++--
 src/tabmat/sparse_matrix.py | 12 +++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 4bd76501..fced90c4 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -56,10 +56,10 @@ def __array__(self, dtype=None):
         return self._array.astype(dtype, copy=False)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        if not all(isinstance(x, (np.ndarray, DenseMatrix)) for x in inputs):
+        if not all(isinstance(x, (np.ndarray, type(self))) for x in inputs):
             return NotImplemented
 
-        inputs = (x._array if isinstance(x, DenseMatrix) else x for x in inputs)
+        inputs = (x._array if isinstance(x, type(self)) else x for x in inputs)
         result = getattr(ufunc, method)(*inputs, **kwargs)
         if method in ("__call__", "accumulate") and ufunc.signature is None:
             # Does not change shape
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index bda43efd..06f54505 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -72,14 +72,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
                 return DenseMatrix(result_matrix)
 
         elif ufunc == np.multiply:
-            if isinstance(inputs[0], SparseMatrix) and isinstance(
-                inputs[1], SparseMatrix
-            ):
-                return SparseMatrix(inputs[0].array_csc.multiply(inputs[1].array_csc))
-            elif isinstance(inputs[0], SparseMatrix):
-                return SparseMatrix(inputs[0].array_csc.multiply(inputs[1]))
+            if isinstance(inputs[0], type(self)) and isinstance(inputs[1], type(self)):
+                return type(self)(inputs[0].array_csc.multiply(inputs[1].array_csc))
+            elif isinstance(inputs[0], type(self)):
+                return type(self)(inputs[0].array_csc.multiply(inputs[1]))
             else:
-                return SparseMatrix(inputs[1].array_csc.multiply(inputs[0]))
+                return type(self)(inputs[1].array_csc.multiply(inputs[0]))
 
         else:
             return NotImplemented

From 3f94e4d4f4626d4249be8e5dc0cf42ed5a8c875c Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Fri, 21 Jul 2023 11:18:07 +0200
Subject: [PATCH 20/32] Minor improvements

---
 src/tabmat/dense_matrix.py  | 5 ++++-
 src/tabmat/sparse_matrix.py | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index fced90c4..a520389d 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -1,3 +1,4 @@
+import numbers
 import textwrap
 from typing import List, Optional, Union
 
@@ -56,7 +57,9 @@ def __array__(self, dtype=None):
         return self._array.astype(dtype, copy=False)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        if not all(isinstance(x, (np.ndarray, type(self))) for x in inputs):
+        if not all(
+            isinstance(x, (np.ndarray, type(self), numbers.Number)) for x in inputs
+        ):
             return NotImplemented
 
         inputs = (x._array if isinstance(x, type(self)) else x for x in inputs)
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 06f54505..7d52773a 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -56,6 +56,9 @@ def __getitem__(self, key):
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         from .dense_matrix import DenseMatrix
 
+        if "out" in kwargs:
+            raise NotImplementedError("out argument is not supported")
+
         if ufunc.nin == 1 and ufunc.nout == 1:
             if getattr(ufunc, method)(0) == 0:
                 result_matrix = sps.csc_matrix(

From 9f943d83c7b96d3a804c7081fe6b6ee57f7bfe79 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Fri, 21 Jul 2023 11:43:19 +0200
Subject: [PATCH 21/32] ufunc support for categoricals

---
 src/tabmat/categorical_matrix.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 1a996cc0..2c688053 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -265,6 +265,12 @@ def __init__(
         self.x_csc: Optional[Tuple[Optional[np.ndarray], np.ndarray, np.ndarray]] = None
         self.dtype = np.dtype(dtype)
 
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        inputs = (
+            x.to_sparse_matrix() if isinstance(x, type(self)) else x for x in inputs
+        )
+        return getattr(ufunc, method)(*inputs, **kwargs)
+
     def recover_orig(self) -> np.ndarray:
         """
         Return 1d numpy array with same data as what was initially fed to __init__.
@@ -491,6 +497,12 @@ def tocsr(self) -> sps.csr_matrix:
             shape=self.shape,
         )
 
+    def to_sparse_matrix(self):
+        """Return a tabmat.SparseMatrix representation."""
+        from .sparse_matrix import SparseMatrix
+
+        return SparseMatrix(self.tocsr())
+
     def toarray(self) -> np.ndarray:
         """Return array representation of matrix."""
         return self.tocsr().A

From 006497092238a7df46fd76a3a84bb39ec77b619d Mon Sep 17 00:00:00 2001
From: "quant-ranger[bot]"
 <132915763+quant-ranger[bot]@users.noreply.github.com>
Date: Mon, 24 Jul 2023 06:44:08 +0100
Subject: [PATCH 22/32] Pre-commit autoupdate (#283)

Co-authored-by: quant-ranger[bot] <132915763+quant-ranger[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5a37c63e..150cd678 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/Quantco/pre-commit-mirrors-black
-   rev: 23.3.0
+   rev: 23.7.0
    hooks:
      - id: black-conda
        additional_dependencies: [flake8-docstrings, flake8-rst-docstrings]
@@ -33,7 +33,7 @@ repos:
       additional_dependencies:
        - python=3.8
  - repo: https://github.com/Quantco/pre-commit-mirrors-pyupgrade
-   rev: 3.7.0
+   rev: 3.9.0
    hooks:
     - id: pyupgrade-conda
  - repo: https://github.com/Quantco/pre-commit-mirrors-cython-lint

From 34cc13c51e2846ecae88e0feb7994141e761e5c1 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Tue, 25 Jul 2023 09:15:43 +0200
Subject: [PATCH 23/32] Remove __array_ufunc__ interface

---
 src/tabmat/categorical_matrix.py |  8 ++-----
 src/tabmat/dense_matrix.py       | 18 +---------------
 src/tabmat/sparse_matrix.py      | 36 +++-----------------------------
 3 files changed, 6 insertions(+), 56 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 2c688053..f6e84c1d 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -265,11 +265,7 @@ def __init__(
         self.x_csc: Optional[Tuple[Optional[np.ndarray], np.ndarray, np.ndarray]] = None
         self.dtype = np.dtype(dtype)
 
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        inputs = (
-            x.to_sparse_matrix() if isinstance(x, type(self)) else x for x in inputs
-        )
-        return getattr(ufunc, method)(*inputs, **kwargs)
+    __array_ufunc__ = None
 
     def recover_orig(self) -> np.ndarray:
         """
@@ -461,7 +457,7 @@ def _cross_sandwich(
         from .dense_matrix import DenseMatrix
 
         if isinstance(other, DenseMatrix):
-            return self._cross_dense(np.asarray(other), d, rows, L_cols, R_cols)
+            return self._cross_dense(other._array, d, rows, L_cols, R_cols)
         if isinstance(other, SparseMatrix):
             return self._cross_sparse(other.array_csc, d, rows, L_cols, R_cols)
         if isinstance(other, CategoricalMatrix):
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index a520389d..464d1f70 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -1,4 +1,3 @@
-import numbers
 import textwrap
 from typing import List, Optional, Union
 
@@ -53,22 +52,7 @@ def __getitem__(self, key):
 
         return type(self)(self._array.__getitem__(key))
 
-    def __array__(self, dtype=None):
-        return self._array.astype(dtype, copy=False)
-
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        if not all(
-            isinstance(x, (np.ndarray, type(self), numbers.Number)) for x in inputs
-        ):
-            return NotImplemented
-
-        inputs = (x._array if isinstance(x, type(self)) else x for x in inputs)
-        result = getattr(ufunc, method)(*inputs, **kwargs)
-        if method in ("__call__", "accumulate") and ufunc.signature is None:
-            # Does not change shape
-            return type(self)(result)
-        else:
-            return result
+    __array_ufunc__ = None
 
     def __matmul__(self, other):
         return self._array.__matmul__(other)
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 7d52773a..5eba5adc 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -53,37 +53,7 @@ def __getitem__(self, key):
 
         return type(self)(self._array.__getitem__(key))
 
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        from .dense_matrix import DenseMatrix
-
-        if "out" in kwargs:
-            raise NotImplementedError("out argument is not supported")
-
-        if ufunc.nin == 1 and ufunc.nout == 1:
-            if getattr(ufunc, method)(0) == 0:
-                result_matrix = sps.csc_matrix(
-                    (
-                        getattr(ufunc, method)(self._array.data, **kwargs),
-                        self._array.indices,
-                        self._array.indptr,
-                    ),
-                    shape=self._array.shape,
-                )
-                return type(self)(result_matrix)
-            else:
-                result_matrix = getattr(ufunc, method)(self._array.todense(), **kwargs)
-                return DenseMatrix(result_matrix)
-
-        elif ufunc == np.multiply:
-            if isinstance(inputs[0], type(self)) and isinstance(inputs[1], type(self)):
-                return type(self)(inputs[0].array_csc.multiply(inputs[1].array_csc))
-            elif isinstance(inputs[0], type(self)):
-                return type(self)(inputs[0].array_csc.multiply(inputs[1]))
-            else:
-                return type(self)(inputs[1].array_csc.multiply(inputs[0]))
-
-        else:
-            return NotImplemented
+    __array_ufunc__ = None
 
     @property
     def shape(self):
@@ -181,8 +151,8 @@ def _cross_sandwich(
         from .categorical_matrix import CategoricalMatrix
         from .dense_matrix import DenseMatrix
 
-        if isinstance(other, (np.ndarray, DenseMatrix)):
-            return self.sandwich_dense(np.asarray(other), d, rows, L_cols, R_cols)
+        if isinstance(other, DenseMatrix):
+            return self.sandwich_dense(other._array, d, rows, L_cols, R_cols)
 
         if isinstance(other, CategoricalMatrix):
             return other._cross_sandwich(self, d, rows, R_cols, L_cols).T

From a396a09ff49c47d21c538cdf2c342cc5e1076614 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Tue, 25 Jul 2023 11:33:00 +0200
Subject: [PATCH 24/32] Remove numpy operator mixin

---
 src/tabmat/categorical_matrix.py | 4 ++--
 src/tabmat/dense_matrix.py       | 2 +-
 src/tabmat/split_matrix.py       | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index f6e84c1d..7783d5fd 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -466,7 +466,7 @@ def _cross_sandwich(
 
     # TODO: best way to return this depends on the use case. See what that is
     # See how csr getcol works
-    def getcol(self, i: int) -> sps.csc_matrix:
+    def getcol(self, i: int) -> SparseMatrix:
         """Return matrix column at specified index."""
         i %= self.shape[1]  # wrap-around indexing
 
@@ -474,7 +474,7 @@ def getcol(self, i: int) -> sps.csc_matrix:
             i += 1
 
         col_i = sps.csc_matrix((self.indices == i).astype(int)[:, None])
-        return col_i
+        return SparseMatrix(col_i)
 
     def tocsr(self) -> sps.csr_matrix:
         """Return scipy csr representation of matrix."""
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 464d1f70..587d244b 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -18,7 +18,7 @@
 )
 
 
-class DenseMatrix(np.lib.mixins.NDArrayOperatorsMixin, MatrixBase):
+class DenseMatrix(MatrixBase):
     """
     A ``numpy.ndarray`` subclass with several additional functions that allow
     it to share the MatrixBase API with SparseMatrix and CategoricalMatrix.
diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py
index 2f7438fa..f936bfb1 100644
--- a/src/tabmat/split_matrix.py
+++ b/src/tabmat/split_matrix.py
@@ -1,10 +1,9 @@
 import warnings
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 from scipy import sparse as sps
 
-from .categorical_matrix import CategoricalMatrix
 from .dense_matrix import DenseMatrix
 from .ext.split import is_sorted, split_col_subsets
 from .matrix_base import MatrixBase
@@ -29,7 +28,7 @@ def as_mx(a: Any):
         return a
     elif sps.issparse(a):
         return SparseMatrix(a)
-    elif isinstance(a, (np.ndarray, DenseMatrix)):
+    elif isinstance(a, np.ndarray):
         return DenseMatrix(a)
     else:
         raise ValueError(f"Cannot convert type {type(a)} to Matrix.")
@@ -135,7 +134,7 @@ class SplitMatrix(MatrixBase):
 
     def __init__(
         self,
-        matrices: List[Union[DenseMatrix, SparseMatrix, CategoricalMatrix]],
+        matrices: Sequence[MatrixBase],
         indices: Optional[List[np.ndarray]] = None,
     ):
         flatten_matrices = []
@@ -149,7 +148,7 @@ def __init__(
             if isinstance(mat, SplitMatrix):
                 # Flatten out the SplitMatrix
                 current_idx = 0
-                for iind, imat in zip(mat.indices, mat.matrices):
+                for iind, imat in zip(mat.indices, mat.matrices):  # type: ignore
                     flatten_matrices.append(imat)
                     index_corrections.append(
                         iind - np.arange(len(iind), dtype=np.int64) - current_idx

From e046dcdb81c25467b6c446342e6c131bfd72fec1 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 26 Jul 2023 12:18:14 +0200
Subject: [PATCH 25/32] Add hstack function

---
 src/tabmat/__init__.py     |  4 +++-
 src/tabmat/split_matrix.py | 30 +++++++++++++++++++++++++++---
 tests/test_matrices.py     | 29 +++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/tabmat/__init__.py b/src/tabmat/__init__.py
index 9f4a8889..53fb1c55 100644
--- a/src/tabmat/__init__.py
+++ b/src/tabmat/__init__.py
@@ -3,7 +3,7 @@
 from .dense_matrix import DenseMatrix
 from .matrix_base import MatrixBase
 from .sparse_matrix import SparseMatrix
-from .split_matrix import SplitMatrix
+from .split_matrix import SplitMatrix, as_tabmat, hstack
 from .standardized_mat import StandardizedMatrix
 
 __all__ = [
@@ -15,4 +15,6 @@
     "CategoricalMatrix",
     "from_csc",
     "from_pandas",
+    "as_tabmat",
+    "hstack",
 ]
diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py
index f936bfb1..a091949f 100644
--- a/src/tabmat/split_matrix.py
+++ b/src/tabmat/split_matrix.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 from scipy import sparse as sps
@@ -16,7 +16,7 @@
 )
 
 
-def as_mx(a: Any):
+def as_tabmat(a: Union[MatrixBase, StandardizedMatrix, np.ndarray, sps.spmatrix]):
     """Convert an array to a corresponding MatrixBase type.
 
     If the input is already a MatrixBase, return untouched.
@@ -27,13 +27,37 @@ def as_mx(a: Any):
     if isinstance(a, (MatrixBase, StandardizedMatrix)):
         return a
     elif sps.issparse(a):
-        return SparseMatrix(a)
+        return SparseMatrix(a.tocsc(copy=False))
     elif isinstance(a, np.ndarray):
         return DenseMatrix(a)
     else:
         raise ValueError(f"Cannot convert type {type(a)} to Matrix.")
 
 
+def hstack(tup: Sequence[Union[MatrixBase, np.ndarray, sps.spmatrix]]) -> MatrixBase:
+    """Stack arrays in sequence horizontally (column wise).
+
+    This is equivalent to concatenation along the second axis,
+    except for 1-D arrays where it concatenates along the first axis.
+
+    Parameters
+    ----------
+    tup: sequence of arrays
+        The arrays must have the same shape along all but the second axis.
+    """
+    matrices = [as_tabmat(a) for a in tup]
+
+    if len(matrices) == 0:
+        raise ValueError("Need at least one array to concatenate.")
+
+    if all(isinstance(mat, SparseMatrix) for mat in matrices):
+        return SparseMatrix(sps.hstack([mat._array for mat in matrices]))
+    elif all(isinstance(mat, DenseMatrix) for mat in matrices):
+        return DenseMatrix(np.hstack([mat._array for mat in matrices]))
+    else:
+        return SplitMatrix(matrices)
+
+
 def _prepare_out_array(out: Optional[np.ndarray], out_shape, out_dtype):
     if out is None:
         out = np.zeros(out_shape, out_dtype)
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 779b160c..34f6a5bb 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -632,3 +632,32 @@ def test_multiply(mat):
     for act in actual:
         assert isinstance(act, MatrixBase)
         np.testing.assert_allclose(act.A, expected)
+
+
+@pytest.mark.parametrize(
+    "mat_1",
+    get_all_matrix_base_subclass_mats()
+    + [base_array()]
+    + [sps.csc_matrix(base_array())],
+)
+@pytest.mark.parametrize(
+    "mat_2",
+    get_all_matrix_base_subclass_mats()
+    + [base_array()]
+    + [sps.csc_matrix(base_array())],
+)
+def test_hstack(mat_1, mat_2):
+    mats = [mat_1, mat_2]
+    stacked = tm.hstack(mats)
+
+    if all(isinstance(mat, (np.ndarray, tm.DenseMatrix)) for mat in mats):
+        assert isinstance(stacked, tm.DenseMatrix)
+    elif all(isinstance(mat, (sps.csc_matrix, tm.SparseMatrix)) for mat in mats):
+        assert isinstance(stacked, tm.SparseMatrix)
+    else:
+        assert isinstance(stacked, tm.SplitMatrix)
+
+    np.testing.assert_array_equal(
+        stacked.A,
+        np.hstack([mat.A if not isinstance(mat, np.ndarray) else mat for mat in mats]),
+    )

From e7f216ca391dff72e7ddca795f728fb9da686906 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 26 Jul 2023 14:31:13 +0200
Subject: [PATCH 26/32] Add method for unpacking underlying array

---
 src/tabmat/categorical_matrix.py | 4 ++++
 src/tabmat/dense_matrix.py       | 4 ++++
 src/tabmat/sparse_matrix.py      | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 7783d5fd..4968c628 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -503,6 +503,10 @@ def toarray(self) -> np.ndarray:
         """Return array representation of matrix."""
         return self.tocsr().A
 
+    def unpack(self):
+        """Return the underlying pandas.Categorical."""
+        return self.cat
+
     def astype(self, dtype, order="K", casting="unsafe", copy=True):
         """Return CategoricalMatrix cast to new type."""
         self.dtype = dtype
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 587d244b..1a70457f 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -105,6 +105,10 @@ def toarray(self):
         """Return array representation of matrix."""
         return self._array
 
+    def unpack(self):
+        """Return the underlying numpy.ndarray."""
+        return self._array
+
     def sandwich(
         self, d: np.ndarray, rows: np.ndarray = None, cols: np.ndarray = None
     ) -> np.ndarray:
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 5eba5adc..8d7a30bc 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -116,6 +116,10 @@ def getcol(self, i):
         """Return matrix column at specified index."""
         return type(self)(self._array.getcol(i))
 
+    def unpack(self):
+        """Return the underlying scipy.sparse.csc_matrix."""
+        return self._array
+
     def toarray(self):
         """Return a dense ndarray representation of the matrix."""
         return self._array.toarray()

From c66e026e30cc2d02ab5e1baf81a3df2762011846 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 26 Jul 2023 14:38:32 +0200
Subject: [PATCH 27/32] Add __matmul__ methods to SparseMatrix

---
 src/tabmat/sparse_matrix.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 8d7a30bc..188f6862 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -53,6 +53,12 @@ def __getitem__(self, key):
 
         return type(self)(self._array.__getitem__(key))
 
+    def __matmul__(self, other):
+        return self._array.__matmul__(other)
+
+    def __rmatmul__(self, other):
+        return self._array.__rmatmul__(other)
+
     __array_ufunc__ = None
 
     @property

From 38813e7482a8b92d83d91cfd17230b1a12c63982 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 27 Jul 2023 12:13:54 +0200
Subject: [PATCH 28/32] Stricter and more consistent indexing

---
 src/tabmat/categorical_matrix.py | 48 ++++++++++-----------------
 src/tabmat/dense_matrix.py       |  9 ++---
 src/tabmat/sparse_matrix.py      |  9 ++---
 src/tabmat/util.py               | 48 +++++++++++++++++++++++++++
 tests/test_matrices.py           | 57 ++++++++++++++++++++++++++++++--
 5 files changed, 124 insertions(+), 47 deletions(-)

diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 4968c628..68161445 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -161,7 +161,7 @@ def matvec(mat, vec):
 
 """
 
-from typing import Any, List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -181,6 +181,7 @@ def matvec(mat, vec):
 from .matrix_base import MatrixBase
 from .sparse_matrix import SparseMatrix
 from .util import (
+    _check_indexer,
     check_matvec_dimensions,
     check_matvec_out_shape,
     check_transpose_matvec_out_shape,
@@ -189,21 +190,15 @@ def matvec(mat, vec):
 )
 
 
-def _is_indexer_full_length(full_length: int, indexer: Any):
-    if isinstance(indexer, int):
-        return full_length == 1
-    elif isinstance(indexer, list):
-        if (np.asarray(indexer) > full_length - 1).any():
-            raise IndexError("Index out-of-range.")
-        return len(set(indexer)) == full_length
-    elif isinstance(indexer, np.ndarray):
+def _is_indexer_full_length(full_length: int, indexer: Union[slice, np.ndarray]):
+    if isinstance(indexer, np.ndarray):
         if (indexer > full_length - 1).any():
             raise IndexError("Index out-of-range.")
-        return len(np.unique(indexer)) == full_length
+        # Order is important in indexing. Could achieve similar results
+        # by rearranging categories.
+        return np.array_equal(indexer.ravel(), np.arange(full_length))
     elif isinstance(indexer, slice):
         return len(range(*indexer.indices(full_length))) == full_length
-    else:
-        raise ValueError(f"Indexing with {type(indexer)} is not allowed.")
 
 
 def _row_col_indexing(
@@ -522,25 +517,18 @@ def _get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarra
         return np.sqrt(mean - col_means**2)
 
     def __getitem__(self, item):
-        if isinstance(item, tuple):
-            row, col = item
-            if _is_indexer_full_length(self.shape[1], col):
-                if isinstance(row, int):
-                    row = [row]
-                return CategoricalMatrix(
-                    self.cat[row], drop_first=self.drop_first, dtype=self.dtype
-                )
-            else:
-                # return a SparseMatrix if we subset columns
-                # TODO: this is inefficient. See issue #101.
-                return SparseMatrix(self.tocsr()[row, col], dtype=self.dtype)
+        row, col = _check_indexer(item)
+
+        if _is_indexer_full_length(self.shape[1], col):
+            if isinstance(row, np.ndarray):
+                row = row.ravel()
+            return CategoricalMatrix(
+                self.cat[row], drop_first=self.drop_first, dtype=self.dtype
+            )
         else:
-            row = item
-        if isinstance(row, int):
-            row = [row]
-        return CategoricalMatrix(
-            self.cat[row], drop_first=self.drop_first, dtype=self.dtype
-        )
+            # return a SparseMatrix if we subset columns
+            # TODO: this is inefficient. See issue #101.
+            return self.to_sparse_matrix()[row, col]
 
     def _cross_dense(
         self,
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 1a70457f..55c9a088 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -11,6 +11,7 @@
 )
 from .matrix_base import MatrixBase
 from .util import (
+    _check_indexer,
     check_matvec_dimensions,
     check_matvec_out_shape,
     check_transpose_matvec_out_shape,
@@ -44,13 +45,7 @@ def __init__(self, input_array):
         self._array = np.asarray(input_array)
 
     def __getitem__(self, key):
-        if not isinstance(key, tuple):
-            key = (key,)
-
-        # Always return a 2d array
-        key = tuple([key_i] if np.isscalar(key_i) else key_i for key_i in key)
-
-        return type(self)(self._array.__getitem__(key))
+        return type(self)(self._array.__getitem__(_check_indexer(key)))
 
     __array_ufunc__ = None
 
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 188f6862..8c2a3b2b 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -14,6 +14,7 @@
 )
 from .matrix_base import MatrixBase
 from .util import (
+    _check_indexer,
     check_matvec_dimensions,
     check_matvec_out_shape,
     check_transpose_matvec_out_shape,
@@ -45,13 +46,7 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False):
         self._array_csr = None
 
     def __getitem__(self, key):
-        if not isinstance(key, tuple):
-            key = (key,)
-
-        # Always return a 2d array
-        key = tuple([key_i] if np.isscalar(key_i) else key_i for key_i in key)
-
-        return type(self)(self._array.__getitem__(key))
+        return type(self)(self._array.__getitem__(_check_indexer(key)))
 
     def __matmul__(self, other):
         return self._array.__matmul__(other)
diff --git a/src/tabmat/util.py b/src/tabmat/util.py
index 2dd570ec..24cfbe30 100644
--- a/src/tabmat/util.py
+++ b/src/tabmat/util.py
@@ -50,3 +50,51 @@ def check_matvec_dimensions(mat, vec: np.ndarray, transpose: bool) -> None:
             f"shapes {mat.shape} and {vec.shape} not aligned: "
             f"{mat.shape[match_dim]} (dim {match_dim}) != {vec.shape[0]} (dim 0)"
         )
+
+
+def _check_indexer(indexer):
+    """Check that the indexer is valid, and transform it to a canonical format."""
+    if not isinstance(indexer, tuple):
+        indexer = (indexer, slice(None, None, None))
+
+    if len(indexer) > 2:
+        raise ValueError("More than two indexers are not supported.")
+
+    row_indexer, col_indexer = indexer
+
+    if isinstance(row_indexer, slice):
+        if isinstance(col_indexer, slice):
+            return row_indexer, col_indexer
+        else:
+            col_indexer = np.asarray(col_indexer)
+            if col_indexer.ndim > 1:
+                raise ValueError(
+                    "Indexing would result in a matrix with more than 2 dimensions."
+                )
+            else:
+                return row_indexer, col_indexer.reshape(-1)
+
+    elif isinstance(col_indexer, slice):
+        row_indexer = np.asarray(row_indexer)
+        if row_indexer.ndim > 1:
+            raise ValueError(
+                "Indexing would result in a matrix with more than 2 dimensions."
+            )
+        else:
+            return row_indexer.reshape(-1), col_indexer
+
+    else:
+        row_indexer = np.asarray(row_indexer)
+        col_indexer = np.asarray(col_indexer)
+        if row_indexer.ndim <= 1 and col_indexer.ndim <= 1:
+            return np.ix_(row_indexer.reshape(-1), col_indexer.reshape(-1))
+        elif (
+            row_indexer.ndim == 2
+            and row_indexer.shape[1] == 1
+            and col_indexer.ndim == 2
+            and col_indexer.shape[0] == 1
+        ):
+            # support for np.ix_-ed indices
+            return row_indexer, col_indexer
+        else:
+            raise ValueError("This type of indexing is not supported.")
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 34f6a5bb..815c48e3 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -552,8 +552,8 @@ def test_indexing_int_row(mat: Union[tm.MatrixBase, tm.StandardizedMatrix]):
     res = mat[0, :]
     if not isinstance(res, np.ndarray):
         res = res.A
-    expected = mat.A[0, :]
-    np.testing.assert_allclose(np.squeeze(res), expected)
+    expected = mat.A[[0], :]
+    np.testing.assert_allclose(res, expected)
 
 
 @pytest.mark.parametrize("mat", get_matrices())
@@ -563,7 +563,58 @@ def test_indexing_range_row(mat: Union[tm.MatrixBase, tm.StandardizedMatrix]):
     if not isinstance(res, np.ndarray):
         res = res.A
     expected = mat.A[0:2, :]
-    np.testing.assert_allclose(np.squeeze(res), expected)
+    np.testing.assert_array_equal(res, expected)
+
+
+@pytest.mark.parametrize("mat", get_unscaled_matrices())
+def test_indexing_int_col(mat):
+    res = mat[:, 0]
+    if not isinstance(res, np.ndarray):
+        res = res.A
+    assert res.shape == (mat.shape[0], 1)
+    expected = mat.A[:, [0]]
+    np.testing.assert_array_equal(res, expected)
+
+
+@pytest.mark.parametrize("mat", get_unscaled_matrices())
+def test_indexing_range_col(mat):
+    res = mat[:, 0:2]
+    if not isinstance(res, np.ndarray):
+        res = res.A
+    assert res.shape == (mat.shape[0], 2)
+    expected = mat.A[:, 0:2]
+    np.testing.assert_array_equal(res, expected)
+
+
+@pytest.mark.parametrize("mat", get_unscaled_matrices())
+def test_indexing_int_both(mat):
+    res = mat[0, 0]
+    if not isinstance(res, np.ndarray):
+        res = res.A
+    assert res.shape == (1, 1)
+    expected = mat.A[0, 0]
+    np.testing.assert_array_equal(res, expected)
+
+
+@pytest.mark.parametrize("mat", get_unscaled_matrices())
+def test_indexing_seq_both(mat):
+    res = mat[[0, 1], [0, 1]]
+    if not isinstance(res, np.ndarray):
+        res = res.A
+    assert res.shape == (2, 2)
+    expected = mat.A[np.ix_([0, 1], [0, 1])]
+    np.testing.assert_array_equal(res, expected)
+
+
+@pytest.mark.parametrize("mat", get_unscaled_matrices())
+def test_indexing_ix_both(mat):
+    indexer = np.ix_([0, 1], [0, 1])
+    res = mat[indexer]
+    if not isinstance(res, np.ndarray):
+        res = res.A
+    assert res.shape == (2, 2)
+    expected = mat.A[indexer]
+    np.testing.assert_array_equal(res, expected)
 
 
 def test_pandas_to_matrix():

From 1ba081d8d9fcc367ac54fa12d543a41d8bf2e66a Mon Sep 17 00:00:00 2001
From: "quant-ranger[bot]"
 <132915763+quant-ranger[bot]@users.noreply.github.com>
Date: Mon, 7 Aug 2023 06:56:45 +0100
Subject: [PATCH 29/32] Pre-commit autoupdate (#284)

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 150cd678..d66edb54 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ repos:
          - --safe
          - --target-version=py36
  - repo: https://github.com/Quantco/pre-commit-mirrors-flake8
-   rev: 6.0.0
+   rev: 6.1.0
    hooks:
     - id: flake8-conda
       additional_dependencies: [

From 693b1058633765e467ee61975584ccbb56903e89 Mon Sep 17 00:00:00 2001
From: Jan Tilly <jan.tilly@quantco.com>
Date: Mon, 7 Aug 2023 18:22:51 +0200
Subject: [PATCH 30/32] Use boa to build conda packages. (#285)

---
 .github/workflows/conda-build-win.yml  | 4 ++--
 .github/workflows/macos-conda-build.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/conda-build-win.yml b/.github/workflows/conda-build-win.yml
index 8528298e..85d01459 100644
--- a/.github/workflows/conda-build-win.yml
+++ b/.github/workflows/conda-build-win.yml
@@ -30,5 +30,5 @@ jobs:
       - name: Build conda package
         shell: pwsh
         run: |
-          mamba install -n base -y conda-build
-          conda build -m .ci_support/${{ matrix.CONDA_BUILD_YML }}.yaml conda.recipe
+          mamba install -n base -y conda-build boa
+          conda mambabuild -m .ci_support/${{ matrix.CONDA_BUILD_YML }}.yaml conda.recipe
diff --git a/.github/workflows/macos-conda-build.sh b/.github/workflows/macos-conda-build.sh
index 1901f4c7..8c9880b9 100755
--- a/.github/workflows/macos-conda-build.sh
+++ b/.github/workflows/macos-conda-build.sh
@@ -2,7 +2,7 @@
 
 set -exo pipefail
 
-mamba install -y conda-build
+mamba install -y conda-build boa
 
 # Don't test cross-compiled result (there is no emulation) and use the latest MacOS SDK.
 if grep -q "osx-arm64" .ci_support/${CONDA_BUILD_YML}.yaml; then
@@ -13,4 +13,4 @@ CONDA_BUILD_SYSROOT:
  - "${CONDA_BUILD_SYSROOT}"
 EOF
 fi
-conda build -m .ci_support/${CONDA_BUILD_YML}.yaml conda.recipe ${CONDA_BUILD_ARGS:-}
+conda mambabuild -m .ci_support/${CONDA_BUILD_YML}.yaml conda.recipe ${CONDA_BUILD_ARGS:-}

From 78d0278755555e934d68024b3fbf4a15f65443ad Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 9 Aug 2023 15:09:39 +0200
Subject: [PATCH 31/32] Be consistent when instantiating from 1d arrays

---
 src/tabmat/sparse_matrix.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index 8c2a3b2b..d98f180f 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -31,8 +31,14 @@ class SparseMatrix(MatrixBase):
     SparseMatrix is instantiated in the same way as scipy.sparse.csc_matrix.
     """
 
-    def __init__(self, arg1, shape=None, dtype=None, copy=False):
-        self._array = sps.csc_matrix(arg1, shape, dtype, copy)
+    def __init__(self, input_array, shape=None, dtype=None, copy=False):
+        if isinstance(input_array, np.ndarray):
+            if input_array.ndim == 1:
+                input_array = input_array.reshape(-1, 1)
+            elif input_array.ndim > 2:
+                raise ValueError("Input array must be 1- or 2-dimensional")
+
+        self._array = sps.csc_matrix(input_array, shape, dtype, copy)
 
         self.idx_dtype = max(self._array.indices.dtype, self._array.indptr.dtype)
         if self._array.indices.dtype != self.idx_dtype:

From e042ce3e2ec5b5aeaaa8405b81b0b172b7ec958f Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Tue, 15 Aug 2023 09:07:53 +0200
Subject: [PATCH 32/32] Add column name metadata to `tabmat` matrices (#278)

* Add column name getters

* Matrix names are also combined

* Add names to constructors

* Add indexing support for column names

* Remove unnecessary code

* Better default column names

* Reduce code duplication

* Saner defaults

* Add convenient getters and setters

* Fix indexing

* Smarter setter for categorical matrices

* Add tests

* Fix subsetting with np.newaxis

* Remove the walrus :(

* Fix test

* Fix indexing with np.ix_

* Propagate column names where it makes sense

* Fix merge mistake

* Add changelog entry
---
 CHANGELOG.rst                    |   4 +
 src/tabmat/categorical_matrix.py | 140 ++++++++++++++++++++++-
 src/tabmat/constructor.py        |  55 ++++++++--
 src/tabmat/dense_matrix.py       | 124 ++++++++++++++++++++-
 src/tabmat/matrix_base.py        |  70 ++++++++++++
 src/tabmat/sparse_matrix.py      | 129 +++++++++++++++++++++-
 src/tabmat/split_matrix.py       |  65 +++++++++++
 src/tabmat/standardized_mat.py   |  71 +++++++++++-
 tests/test_matrices.py           | 183 +++++++++++++++++++++++++++++++
 9 files changed, 814 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 331695b1..9de08d2f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,6 +10,10 @@ Changelog
 Unreleased
 ----------
 
+**New features:**
+
+- Add column name and term name metadata to ``MatrixBase`` objects. These are automatically populated when initializing a ``MatrixBase`` from a ``pandas.DataFrame``. In addition, they can be accessed and modified via the ``column_names`` and ``term_names`` properties.
+
 **Other changes:**
 
 - Improve the performance of ``from_pandas`` in the case of low-cardinality categorical variables.
diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 68161445..1180646c 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -161,6 +161,7 @@ def matvec(mat, vec):
 
 """
 
+import re
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
@@ -245,6 +246,9 @@ def __init__(
         cat_vec: Union[List, np.ndarray, pd.Categorical],
         drop_first: bool = False,
         dtype: np.dtype = np.float64,
+        column_name: Optional[str] = None,
+        term_name: Optional[str] = None,
+        column_name_format: str = "{name}[{category}]",
     ):
         if pd.isnull(cat_vec).any():
             raise ValueError("Categorical data can't have missing values.")
@@ -260,6 +264,13 @@ def __init__(
         self.x_csc: Optional[Tuple[Optional[np.ndarray], np.ndarray, np.ndarray]] = None
         self.dtype = np.dtype(dtype)
 
+        self._colname = column_name
+        if term_name is None:
+            self._term = self._colname
+        else:
+            self._term = term_name
+        self._colname_format = column_name_format
+
     __array_ufunc__ = None
 
     def recover_orig(self) -> np.ndarray:
@@ -466,10 +477,16 @@ def getcol(self, i: int) -> SparseMatrix:
         i %= self.shape[1]  # wrap-around indexing
 
         if self.drop_first:
-            i += 1
+            i_corr = i + 1
+        else:
+            i_corr = i
 
-        col_i = sps.csc_matrix((self.indices == i).astype(int)[:, None])
-        return SparseMatrix(col_i)
+        col_i = sps.csc_matrix((self.indices == i_corr).astype(int)[:, None])
+        return SparseMatrix(
+            col_i,
+            column_names=[self.column_names[i]],
+            term_names=[self.term_names[i]],
+        )
 
     def tocsr(self) -> sps.csr_matrix:
         """Return scipy csr representation of matrix."""
@@ -492,7 +509,11 @@ def to_sparse_matrix(self):
         """Return a tabmat.SparseMatrix representation."""
         from .sparse_matrix import SparseMatrix
 
-        return SparseMatrix(self.tocsr())
+        return SparseMatrix(
+            self.tocsr(),
+            column_names=self.column_names,
+            term_names=self.term_names,
+        )
 
     def toarray(self) -> np.ndarray:
         """Return array representation of matrix."""
@@ -523,7 +544,11 @@ def __getitem__(self, item):
             if isinstance(row, np.ndarray):
                 row = row.ravel()
             return CategoricalMatrix(
-                self.cat[row], drop_first=self.drop_first, dtype=self.dtype
+                self.cat[row],
+                drop_first=self.drop_first,
+                dtype=self.dtype,
+                column_name=self._colname,
+                column_name_format=self._colname_format,
             )
         else:
             # return a SparseMatrix if we subset columns
@@ -638,8 +663,111 @@ def multiply(self, other) -> SparseMatrix:
                     np.arange(self.shape[0] + 1, dtype=int),
                 ),
                 shape=self.shape,
-            )
+            ),
+            column_names=self.column_names,
+            term_names=self.term_names,
         )
 
     def __repr__(self):
         return str(self.cat)
+
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        if type == "column":
+            name = self._colname
+        elif type == "term":
+            name = self._term
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
+
+        if indices is None:
+            indices = list(range(len(self.cat.categories) - self.drop_first))
+        if name is None and missing_prefix is None:
+            return [None] * (len(self.cat.categories) - self.drop_first)
+        elif name is None:
+            name = f"{missing_prefix}{indices[0]}-{indices[-1]}"
+
+        if type == "column":
+            return [
+                self._colname_format.format(name=name, category=cat)
+                for cat in self.cat.categories[self.drop_first :]
+            ]
+        else:
+            return [name] * (len(self.cat.categories) - self.drop_first)
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        if isinstance(names, str):
+            names = [names]
+
+        if len(names) != 1:
+            if type == "column":
+                # Try finding the column name
+                base_names = []
+                for name, cat in zip(names, self.cat.categories[self.drop_first :]):
+                    partial_name = self._colname_format.format(
+                        name="__CAPTURE__", category=cat
+                    )
+                    pattern = re.escape(partial_name).replace("__CAPTURE__", "(.*)")
+                    if name is not None:
+                        match = re.search(pattern, name)
+                    else:
+                        match = None
+                    if match is not None:
+                        base_names.append(match.group(1))
+                    else:
+                        base_names.append(name)
+                names = base_names
+
+            if len(names) == self.shape[1] and all(name == names[0] for name in names):
+                names = [names[0]]
+
+        if len(names) != 1:
+            raise ValueError("A categorical matrix has only one name")
+
+        if type == "column":
+            self._colname = names[0]
+        elif type == "term":
+            self._term = names[0]
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
diff --git a/src/tabmat/constructor.py b/src/tabmat/constructor.py
index f8e23c31..d280140a 100644
--- a/src/tabmat/constructor.py
+++ b/src/tabmat/constructor.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import List, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -21,6 +21,7 @@ def from_pandas(
     object_as_cat: bool = False,
     cat_position: str = "expand",
     drop_first: bool = False,
+    categorical_format: str = "{name}[{category}]",
 ) -> MatrixBase:
     """
     Transform a pandas.DataFrame into an efficient SplitMatrix. For most users, this
@@ -72,7 +73,14 @@ def from_pandas(
         if object_as_cat and coldata.dtype == object:
             coldata = coldata.astype("category")
         if isinstance(coldata.dtype, pd.CategoricalDtype):
-            cat = CategoricalMatrix(coldata, drop_first=drop_first, dtype=dtype)
+            cat = CategoricalMatrix(
+                coldata,
+                drop_first=drop_first,
+                dtype=dtype,
+                column_name=colname,
+                term_name=colname,
+                column_name_format=categorical_format,
+            )
             if len(coldata.cat.categories) < cat_threshold:
                 (
                     X_dense_F,
@@ -82,6 +90,8 @@ def from_pandas(
                 ) = _split_sparse_and_dense_parts(
                     sps.csc_matrix(cat.tocsr(), dtype=dtype),
                     threshold=sparse_threshold,
+                    column_names=cat.get_names("column"),
+                    term_names=cat.get_names("term"),
                 )
                 matrices.append(X_dense_F)
                 is_cat.append(True)
@@ -128,13 +138,26 @@ def from_pandas(
             f"Columns {ignored_cols} were ignored. Make sure they have a valid dtype."
         )
     if len(dense_dfidx) > 0:
-        matrices.append(DenseMatrix(df.iloc[:, dense_dfidx].astype(dtype)))
+        matrices.append(
+            DenseMatrix(
+                df.iloc[:, dense_dfidx].astype(dtype),
+                column_names=df.columns[dense_dfidx],
+                term_names=df.columns[dense_dfidx],
+            )
+        )
         indices.append(dense_mxidx)
         is_cat.append(False)
     if len(sparse_dfcols) > 0:
         sparse_dict = {i: v for i, v in enumerate(sparse_dfcols)}
         full_sparse = pd.DataFrame(sparse_dict).sparse.to_coo()
-        matrices.append(SparseMatrix(full_sparse, dtype=dtype))
+        matrices.append(
+            SparseMatrix(
+                full_sparse,
+                dtype=dtype,
+                column_names=[col.name for col in sparse_dfcols],
+                term_names=[col.name for col in sparse_dfcols],
+            )
+        )
         indices.append(sparse_mxidx)
         is_cat.append(False)
 
@@ -157,7 +180,10 @@ def from_pandas(
 
 
 def _split_sparse_and_dense_parts(
-    arg1: sps.csc_matrix, threshold: float = 0.1
+    arg1: sps.csc_matrix,
+    threshold: float = 0.1,
+    column_names: Optional[Sequence[Optional[str]]] = None,
+    term_names: Optional[Sequence[Optional[str]]] = None,
 ) -> Tuple[DenseMatrix, SparseMatrix, np.ndarray, np.ndarray]:
     """
     Split matrix.
@@ -176,12 +202,25 @@ def _split_sparse_and_dense_parts(
     dense_indices = np.where(densities > threshold)[0]
     sparse_indices = np.setdiff1d(np.arange(densities.shape[0]), dense_indices)
 
-    X_dense_F = DenseMatrix(np.asfortranarray(arg1[:, dense_indices].toarray()))
-    X_sparse = SparseMatrix(arg1[:, sparse_indices])
+    if column_names is None:
+        column_names = [None] * arg1.shape[1]
+    if term_names is None:
+        term_names = column_names
+
+    X_dense_F = DenseMatrix(
+        np.asfortranarray(arg1[:, dense_indices].toarray()),
+        column_names=[column_names[i] for i in dense_indices],
+        term_names=[term_names[i] for i in dense_indices],
+    )
+    X_sparse = SparseMatrix(
+        arg1[:, sparse_indices],
+        column_names=[column_names[i] for i in sparse_indices],
+        term_names=[term_names[i] for i in sparse_indices],
+    )
     return X_dense_F, X_sparse, dense_indices, sparse_indices
 
 
-def from_csc(mat: sps.csc_matrix, threshold=0.1):
+def from_csc(mat: sps.csc_matrix, threshold=0.1, column_names=None, term_names=None):
     """
     Convert a CSC-format sparse matrix into a ``SplitMatrix``.
 
diff --git a/src/tabmat/dense_matrix.py b/src/tabmat/dense_matrix.py
index 55c9a088..5de2c91f 100644
--- a/src/tabmat/dense_matrix.py
+++ b/src/tabmat/dense_matrix.py
@@ -34,7 +34,7 @@ class DenseMatrix(MatrixBase):
 
     """
 
-    def __init__(self, input_array):
+    def __init__(self, input_array, column_names=None, term_names=None):
         input_array = np.asarray(input_array)
 
         if input_array.ndim == 1:
@@ -43,9 +43,32 @@ def __init__(self, input_array):
             raise ValueError("Input array must be 1- or 2-dimensional")
 
         self._array = np.asarray(input_array)
+        width = self._array.shape[1]
+
+        if column_names is not None:
+            if len(column_names) != width:
+                raise ValueError(
+                    f"Expected {width} column names, got {len(column_names)}"
+                )
+            self._colnames = column_names
+        else:
+            self._colnames = [None] * width
+
+        if term_names is not None:
+            if len(term_names) != width:
+                raise ValueError(f"Expected {width} term names, got {len(term_names)}")
+            self._terms = term_names
+        else:
+            self._terms = self._colnames
 
     def __getitem__(self, key):
-        return type(self)(self._array.__getitem__(_check_indexer(key)))
+        row, col = _check_indexer(key)
+        colnames = list(np.array(self.column_names)[col].ravel())
+        terms = list(np.array(self.term_names)[col].ravel())
+
+        return type(self)(
+            self._array.__getitem__((row, col)), column_names=colnames, term_names=terms
+        )
 
     __array_ufunc__ = None
 
@@ -90,11 +113,19 @@ def transpose(self):
 
     def astype(self, dtype, order="K", casting="unsafe", copy=True):
         """Copy of the array, cast to a specified type."""
-        return type(self)(self._array.astype(dtype, order, casting, copy))
+        return type(self)(
+            self._array.astype(dtype, order, casting, copy),
+            column_names=self.column_names,
+            term_names=self.term_names,
+        )
 
     def getcol(self, i):
         """Return matrix column at specified index."""
-        return type(self)(self._array[:, [i]])
+        return type(self)(
+            self._array[:, [i]],
+            column_names=[self.column_names[i]],
+            term_names=[self.term_names[i]],
+        )
 
     def toarray(self):
         """Return array representation of matrix."""
@@ -212,5 +243,86 @@ def multiply(self, other):
         This assumes that ``other`` is a vector of size ``self.shape[0]``.
         """
         if np.asanyarray(other).ndim == 1:
-            return type(self)(self._array.__mul__(other[:, np.newaxis]))
-        return type(self)(self._array.__mul__(other))
+            return type(self)(
+                self._array.__mul__(other[:, np.newaxis]),
+                column_names=self.column_names,
+                term_names=self.term_names,
+            )
+        return type(self)(
+            self._array.__mul__(other),
+            column_names=self.column_names,
+            term_names=self.term_names,
+        )
+
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        if type == "column":
+            names = np.array(self._colnames)
+        elif type == "term":
+            names = np.array(self._terms)
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
+
+        if indices is None:
+            indices = list(range(len(self._colnames)))
+
+        if missing_prefix is not None:
+            default_names = np.array([f"{missing_prefix}{i}" for i in indices])
+            names[names == None] = default_names[names == None]  # noqa: E711
+
+        return list(names)
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        if isinstance(names, str):
+            names = [names]
+
+        if len(names) != self.shape[1]:
+            raise ValueError(f"Length of names must be {self.shape[1]}")
+
+        if type == "column":
+            self._colnames = names
+        elif type == "term":
+            self._terms = names
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
diff --git a/src/tabmat/matrix_base.py b/src/tabmat/matrix_base.py
index 88091834..ac17d717 100644
--- a/src/tabmat/matrix_base.py
+++ b/src/tabmat/matrix_base.py
@@ -164,6 +164,76 @@ def standardize(
     def __getitem__(self, item):
         pass
 
+    @abstractmethod
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        pass
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        pass
+
+    @property
+    def column_names(self):
+        """Column names of the matrix."""
+        return self.get_names(type="column")
+
+    @column_names.setter
+    def column_names(self, names: List[Optional[str]]):
+        self.set_names(names, type="column")
+
+    @property
+    def term_names(self):
+        """Term names of the matrix.
+
+        For differences between column names and term names, see ``get_names``.
+        """
+        return self.get_names(type="term")
+
+    @term_names.setter
+    def term_names(self, names: List[Optional[str]]):
+        self.set_names(names, type="term")
+
     # Higher priority than numpy arrays, so behavior for funcs like "@" defaults to the
     # behavior of this class
     __array_priority__ = 11
diff --git a/src/tabmat/sparse_matrix.py b/src/tabmat/sparse_matrix.py
index d98f180f..1c568757 100644
--- a/src/tabmat/sparse_matrix.py
+++ b/src/tabmat/sparse_matrix.py
@@ -31,7 +31,15 @@ class SparseMatrix(MatrixBase):
     SparseMatrix is instantiated in the same way as scipy.sparse.csc_matrix.
     """
 
-    def __init__(self, input_array, shape=None, dtype=None, copy=False):
+    def __init__(
+        self,
+        input_array,
+        shape=None,
+        dtype=None,
+        copy=False,
+        column_names=None,
+        term_names=None,
+    ):
         if isinstance(input_array, np.ndarray):
             if input_array.ndim == 1:
                 input_array = input_array.reshape(-1, 1)
@@ -51,8 +59,32 @@ def __init__(self, input_array, shape=None, dtype=None, copy=False):
             self._array.sort_indices()
         self._array_csr = None
 
+        if column_names is not None:
+            if len(column_names) != self.shape[1]:
+                raise ValueError(
+                    f"Expected {self.shape[1]} column names, got {len(column_names)}"
+                )
+            self._colnames = column_names
+        else:
+            self._colnames = [None] * self.shape[1]
+
+        if term_names is not None:
+            if len(term_names) != self.shape[1]:
+                raise ValueError(
+                    f"Expected {self.shape[1]} term names, got {len(term_names)}"
+                )
+            self._terms = term_names
+        else:
+            self._terms = self._colnames
+
     def __getitem__(self, key):
-        return type(self)(self._array.__getitem__(_check_indexer(key)))
+        row, col = _check_indexer(key)
+        colnames = list(np.array(self.column_names)[col].ravel())
+        terms = list(np.array(self.term_names)[col].ravel())
+
+        return type(self)(
+            self._array.__getitem__((row, col)), column_names=colnames, term_names=terms
+        )
 
     def __matmul__(self, other):
         return self._array.__matmul__(other)
@@ -121,7 +153,11 @@ def transpose(self):
 
     def getcol(self, i):
         """Return matrix column at specified index."""
-        return type(self)(self._array.getcol(i))
+        return type(self)(
+            self._array.getcol(i),
+            column_names=[self.column_names[i]],
+            term_names=[self.term_names[i]],
+        )
 
     def unpack(self):
         """Return the underlying scipy.sparse.csc_matrix."""
@@ -285,6 +321,87 @@ def multiply(self, other):
         from the parent class except that ``other`` is assumed to be a vector of size
         ``self.shape[0]``.
         """
-        if other.ndim == 1:
-            return type(self)(self._array.multiply(other[:, np.newaxis]))
-        return type(self)(self._array.multiply(other))
+        if np.asanyarray(other).ndim == 1:
+            return type(self)(
+                self._array.multiply(other[:, np.newaxis]),
+                column_names=self.column_names,
+                term_names=self.term_names,
+            )
+        return type(self)(
+            self._array.multiply(other),
+            column_names=self.column_names,
+            term_names=self.term_names,
+        )
+
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        if type == "column":
+            names = np.array(self._colnames)
+        elif type == "term":
+            names = np.array(self._terms)
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
+
+        if indices is None:
+            indices = list(range(len(self._colnames)))
+
+        if missing_prefix is not None:
+            default_names = np.array([f"{missing_prefix}{i}" for i in indices])
+            names[names == None] = default_names[names == None]  # noqa: E711
+
+        return list(names)
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        if isinstance(names, str):
+            names = [names]
+
+        if len(names) != self.shape[1]:
+            raise ValueError(f"Length of names must be {self.shape[1]}")
+
+        if type == "column":
+            self._colnames = names
+        elif type == "term":
+            self._terms = names
+        else:
+            raise ValueError(f"Type must be 'column' or 'term', got {type}")
diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py
index a091949f..a60b1fb3 100644
--- a/src/tabmat/split_matrix.py
+++ b/src/tabmat/split_matrix.py
@@ -113,8 +113,16 @@ def _combine_matrices(matrices, indices):
         if len(this_type_matrices) > 1:
             new_matrix = mat_type_(stack_fn([matrices[i] for i in this_type_matrices]))
             new_indices = np.concatenate([indices[i] for i in this_type_matrices])
+            new_colnames = np.concatenate(
+                [np.array(matrices[i]._colnames) for i in this_type_matrices]
+            )
+            new_terms = np.concatenate(
+                [np.array(matrices[i]._terms) for i in this_type_matrices]
+            )
             sorter = np.argsort(new_indices)
             sorted_matrix = new_matrix[:, sorter]
+            sorted_matrix._colnames = list(new_colnames[sorter])
+            sorted_matrix._terms = list(new_terms[sorter])
             sorted_indices = new_indices[sorter]
 
             assert sorted_matrix.shape[0] == n_row
@@ -477,3 +485,60 @@ def __repr__(self):
         return out
 
     __array_priority__ = 13
+
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        names = np.empty(self.shape[1], dtype=object)
+        for idx, mat in zip(self.indices, self.matrices):
+            names[idx] = mat.get_names(type, missing_prefix, idx)
+        return list(names)
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        names_array = np.array(names)
+
+        if len(names) != self.shape[1]:
+            raise ValueError(f"Length of names must be {self.shape[1]}")
+
+        for idx, mat in zip(self.indices, self.matrices):
+            mat.set_names(list(names_array[idx]), type)
diff --git a/src/tabmat/standardized_mat.py b/src/tabmat/standardized_mat.py
index 19b04f5a..2e88dbb0 100644
--- a/src/tabmat/standardized_mat.py
+++ b/src/tabmat/standardized_mat.py
@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import List, Optional, Union
 
 import numpy as np
 from scipy import sparse as sps
@@ -298,3 +298,72 @@ def __repr__(self):
         Mult: {self.mult}
         """
         return out
+
+    def get_names(
+        self,
+        type: str = "column",
+        missing_prefix: Optional[str] = None,
+        indices: Optional[List[int]] = None,
+    ) -> List[Optional[str]]:
+        """Get column names.
+
+        For columns that do not have a name, a default name is created using the
+        followig pattern: ``"{missing_prefix}{start_index + i}"`` where ``i`` is
+        the index of the column.
+
+        Parameters
+        ----------
+        type: str {'column'|'term'}
+            Whether to get column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        missing_prefix: Optional[str], default None
+            Prefix to use for columns that do not have a name. If None, then no
+            default name is created.
+        indices
+            The indices used for columns that do not have a name. If ``None``,
+            then the indices are ``list(range(self.shape[1]))``.
+
+        Returns
+        -------
+        List[Optional[str]]
+            Column names.
+        """
+        return self.mat.get_names(type, missing_prefix, indices)
+
+    def set_names(self, names: Union[str, List[Optional[str]]], type: str = "column"):
+        """Set column names.
+
+        Parameters
+        ----------
+        names: List[Optional[str]]
+            Names to set.
+        type: str {'column'|'term'}
+            Whether to set column names or term names. The main difference is that
+            a categorical submatrix is counted as a single term, whereas it is
+            counted as multiple columns. Furthermore, matrices created from formulas
+            have a difference between a column and term (c.f. ``formulaic`` docs).
+        """
+        self.mat.set_names(names, type)
+
+    @property
+    def column_names(self):
+        """Column names of the matrix."""
+        return self.get_names(type="column")
+
+    @column_names.setter
+    def column_names(self, names: List[Optional[str]]):
+        self.set_names(names, type="column")
+
+    @property
+    def term_names(self):
+        """Term names of the matrix.
+
+        For differences between column names and term names, see ``get_names``.
+        """
+        return self.get_names(type="term")
+
+    @term_names.setter
+    def term_names(self, names: List[Optional[str]]):
+        self.set_names(names, type="term")
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 815c48e3..85b2cd68 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -712,3 +712,186 @@ def test_hstack(mat_1, mat_2):
         stacked.A,
         np.hstack([mat.A if not isinstance(mat, np.ndarray) else mat for mat in mats]),
     )
+
+
+def test_names_against_expectation():
+    X = tm.DenseMatrix(
+        np.ones((5, 2)), column_names=["a", None], term_names=["a", None]
+    )
+    Xc = tm.CategoricalMatrix(
+        pd.Categorical(["a", "b", "c", "b", "a"]), column_name="c", term_name="c"
+    )
+    Xc2 = tm.CategoricalMatrix(pd.Categorical(["a", "b", "c", "b", "a"]))
+    Xs = tm.SparseMatrix(
+        sps.csc_matrix(np.ones((5, 2))),
+        column_names=["s1", "s2"],
+        term_names=["s", "s"],
+    )
+
+    mat = tm.SplitMatrix(matrices=[X, Xc, Xc2, Xs])
+
+    assert mat.get_names(type="column") == [
+        "a",
+        None,
+        "c[a]",
+        "c[b]",
+        "c[c]",
+        None,
+        None,
+        None,
+        "s1",
+        "s2",
+    ]
+
+    assert mat.get_names(type="term") == [
+        "a",
+        None,
+        "c",
+        "c",
+        "c",
+        None,
+        None,
+        None,
+        "s",
+        "s",
+    ]
+
+    assert mat.get_names(type="column", missing_prefix="_col_") == [
+        "a",
+        "_col_1",
+        "c[a]",
+        "c[b]",
+        "c[c]",
+        "_col_5-7[a]",
+        "_col_5-7[b]",
+        "_col_5-7[c]",
+        "s1",
+        "s2",
+    ]
+
+    assert mat.get_names(type="term", missing_prefix="_col_") == [
+        "a",
+        "_col_1",
+        "c",
+        "c",
+        "c",
+        "_col_5-7",
+        "_col_5-7",
+        "_col_5-7",
+        "s",
+        "s",
+    ]
+
+
+@pytest.mark.parametrize("mat", get_matrices())
+@pytest.mark.parametrize("missing_prefix", ["_col_", "X"])
+def test_names_getter_setter(mat, missing_prefix):
+    names = mat.get_names(missing_prefix=missing_prefix, type="column")
+    mat.column_names = names
+    assert mat.column_names == names
+
+
+@pytest.mark.parametrize("mat", get_matrices())
+@pytest.mark.parametrize("missing_prefix", ["_col_", "X"])
+def test_terms_getter_setter(mat, missing_prefix):
+    names = mat.get_names(missing_prefix=missing_prefix, type="term")
+    mat.term_names = names
+    assert mat.term_names == names
+
+
+@pytest.mark.parametrize("indexer_1", [slice(None, None), 0, slice(2, 8)])
+@pytest.mark.parametrize("indexer_2", [[0], slice(1, 4), [0, 2, 3], [4, 3, 2, 1, 0]])
+@pytest.mark.parametrize("sparse", [True, False])
+def test_names_indexing(indexer_1, indexer_2, sparse):
+    X = np.ones((10, 5), dtype=np.float64)
+    colnames = ["a", "b", None, "d", "e"]
+    termnames = ["t1", "t1", None, "t4", "t5"]
+
+    colnames_array = np.array(colnames)
+    termnames_array = np.array(termnames)
+
+    if sparse:
+        X = tm.SparseMatrix(
+            sps.csc_matrix(X), column_names=colnames, term_names=termnames
+        )
+    else:
+        X = tm.DenseMatrix(X, column_names=colnames, term_names=termnames)
+
+    X_indexed = X[indexer_1, indexer_2]
+    if not isinstance(X_indexed, tm.MatrixBase):
+        pytest.skip("Does not return MatrixBase")
+    assert X_indexed.column_names == list(colnames_array[indexer_2])
+    assert X_indexed.term_names == list(termnames_array[indexer_2])
+
+
+@pytest.mark.parametrize("mat_1", get_all_matrix_base_subclass_mats())
+@pytest.mark.parametrize("mat_2", get_all_matrix_base_subclass_mats())
+def test_combine_names(mat_1, mat_2):
+    mat_1.column_names = mat_1.get_names(missing_prefix="m1_", type="column")
+    mat_2.column_names = mat_2.get_names(missing_prefix="m2_", type="column")
+
+    mat_1.term_names = mat_1.get_names(missing_prefix="m1_", type="term")
+    mat_2.term_names = mat_2.get_names(missing_prefix="m2_", type="term")
+
+    combined = tm.SplitMatrix(matrices=[mat_1, mat_2])
+
+    assert combined.column_names == mat_1.column_names + mat_2.column_names
+    assert combined.term_names == mat_1.term_names + mat_2.term_names
+
+
+@pytest.mark.parametrize("prefix_sep", ["_", ": "])
+@pytest.mark.parametrize("drop_first", [True, False])
+def test_names_pandas(prefix_sep, drop_first):
+    n_rows = 50
+    dense_column = np.linspace(-10, 10, num=n_rows, dtype=np.float64)
+    dense_column_with_lots_of_zeros = dense_column.copy()
+    dense_column_with_lots_of_zeros[:44] = 0.0
+    sparse_column = np.zeros(n_rows, dtype=np.float64)
+    sparse_column[0] = 1.0
+    cat_column_lowdim = np.tile(["a", "b"], n_rows // 2)
+    cat_column_highdim = np.arange(n_rows)
+
+    dense_ser = pd.Series(dense_column)
+    lowdense_ser = pd.Series(dense_column_with_lots_of_zeros)
+    sparse_ser = pd.Series(sparse_column, dtype=pd.SparseDtype("float", 0.0))
+    cat_ser_lowdim = pd.Categorical(cat_column_lowdim)
+    cat_ser_highdim = pd.Categorical(cat_column_highdim)
+
+    df = pd.DataFrame(
+        data={
+            "d": dense_ser,
+            "cl_obj": cat_ser_lowdim.astype(object),
+            "ch": cat_ser_highdim,
+            "ds": lowdense_ser,
+            "s": sparse_ser,
+        }
+    )
+
+    categorical_format = "{name}" + prefix_sep + "{category}"
+    mat_end = tm.from_pandas(
+        df,
+        dtype=np.float64,
+        sparse_threshold=0.3,
+        cat_threshold=4,
+        object_as_cat=True,
+        cat_position="end",
+        categorical_format=categorical_format,
+        drop_first=drop_first,
+    )
+
+    expanded_df = pd.get_dummies(df, prefix_sep=prefix_sep, drop_first=drop_first)
+    assert mat_end.column_names == expanded_df.columns.tolist()
+
+    mat_expand = tm.from_pandas(
+        df,
+        dtype=np.float64,
+        sparse_threshold=0.3,
+        cat_threshold=4,
+        object_as_cat=True,
+        cat_position="expand",
+        categorical_format=categorical_format,
+        drop_first=drop_first,
+    )
+
+    unique_terms = list(dict.fromkeys(mat_expand.term_names))
+    assert unique_terms == df.columns.tolist()