coreweave · dfinster · Apr 18, 2024 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
diff --git a/docs/architecture/index.md b/docs/architecture/index.md
@@ -0,0 +1,5 @@
+---
+title: Tensorizer Architecture Overview
+---
+
+Hello World!
diff --git a/docs/encryption.md b/docs/encryption.md
@@ -1,10 +1,11 @@
-# Tensor Encryption
-
+---
+title: Tensor Encryption
+---
 `tensorizer` supports fast tensor weight encryption and decryption during
 serialization and deserialization, respectively.
 
 > [!NOTE]
-> 
+>
 > To use `tensorizer` encryption, a recent version of `libsodium` must be
 > installed. Install `libsodium` with `apt-get install libsodium23`
 > on Ubuntu or Debian, or follow
@@ -37,18 +38,18 @@ security beyond that is beyond the scope of `tensorizer`'s encryption as
 currently available.
 
 > [!WARNING]
-> 
+>
 > This does not include encryption for anything except for tensor weights.
 > Metadata such as a tensor's name, dtype, shape, size, and non-keyed hashes
 > are neither encrypted nor authenticated.
 
 > [!WARNING]
-> 
+>
 > This level of encryption does not provide message authentication for metadata
 > and does not protect against reordering or truncation of chunks.
 
 > [!NOTE]
-> 
+>
 > Unencrypted/unauthenticated tensor data is rejected during deserialization
 > if decryption is requested, and vice versa.
 
@@ -128,7 +129,7 @@ with open("tensor.key", "wb") as key_file:
 # Load the randomly-generated key from where it was saved
 with open("tensor.key", "rb") as key_file:
     key: bytes = key_file.read()
- 
+
 decryption_params = DecryptionParams.from_key(key)
 
 deserializer = TensorDeserializer("model.tensors", encryption=decryption_params)

diff --git a/docs/how-to/index-advanced-topics.md b/docs/how-to/index-advanced-topics.md
@@ -0,0 +1,5 @@
+---
+title: Advanced How-to Guides
+---
+
+This section contains advanced guides on how to use Tensorizer for various tasks.
diff --git a/docs/how-to/index.md b/docs/how-to/index.md
@@ -0,0 +1,4 @@
+---
+title: How-to Guides
+---
+This section contains guides on how to use Tensorizer for various tasks.
diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,24 @@
+---
+title: About Tensorizer
+---
+
+
+
+
+## MkDocs configuration
+
+To install the required packages and serve the documentation locally, run the following commands:
+
+```bash
+$ pip install mkdocs \
+              mkdocs-material \
+              mkdocs-mermaid2-plugin \
+              mkdocstrings \
+              mkdocstrings[python] \
+              mkdocs-gen-files \
+              mkdocs-literate-nav \
+              mkdocs-section-index
+$ mkdocs serve
+```
+
+**Optional:** `mkdocstring[python]` can also be installed [as a project dependency](https://mkdocstrings.github.io/python/#installation).
diff --git a/docs/reference/placeholder.md b/docs/reference/placeholder.md
@@ -0,0 +1,4 @@
+---
+title: Placeholder
+---
+Temporary Placeholder for `mkdocs_gen_ref.py`
diff --git a/docs/scripts/mkdocs_gen_ref.py b/docs/scripts/mkdocs_gen_ref.py
@@ -0,0 +1,39 @@
+"""Generate the code reference pages and navigation."""
+
+# See:
+# - https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages
+# - https://mkdocstrings.github.io/recipes/#generate-pages-on-the-fly
+
+from pathlib import Path
+
+import mkdocs_gen_files
+
+nav = mkdocs_gen_files.Nav()
+
+root = Path(__file__).parent.parent.parent
+src = root / "tensorizer"
+
+for path in sorted(src.rglob("*.py")):
+    module_path = path.relative_to(root).with_suffix("")
+    doc_path = path.relative_to(root).with_suffix(".md")
+    full_doc_path = Path("reference", doc_path)
+
+    parts = tuple(module_path.parts)
+
+    if parts[-1] == "__init__":
+        parts = parts[:-1]
+        doc_path = doc_path.with_name("index.md")
+        full_doc_path = full_doc_path.with_name("index.md")
+    elif any(p.startswith("_") for p in parts):
+        continue
+
+    nav[parts] = doc_path.as_posix()
+
+    with mkdocs_gen_files.open(full_doc_path, "w") as fd:
+        ident = ".".join(parts)
+        fd.write(f"::: {ident}")
+
+    mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root))
+
+with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
+    nav_file.writelines(nav.build_literate_nav())
diff --git a/examples/hf_serialization.py b/examples/hf_serialization.py
@@ -84,6 +84,7 @@ def setup_logger():
 def set_logger_verbosity(verbosity: int = 2):
     """
     Configure the verbosity of the global logger.
+
     Args:
         verbosity: Verbosity level. Clamped to [0, 3].
     """
@@ -113,6 +114,7 @@ def assert_module_equal(
 ) -> None:
     """
     Check that the state dicts of two modules are equal.
+
     Args:
         before: The original module to compare against.
         after: The secondary module to compare.

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -0,0 +1,72 @@
+site_name: Tensorizer
+docs_dir: docs
+theme:
+  name: material # pip install mkdocs-material
+  palette:
+    primary: blue
+    accent: blue
+  logo: https://github.com/coreweave/tensorizer/assets/24918963/82f6afff-c31f-4969-9da4-c4b36368df35
+  favicon: https://github.com/coreweave/tensorizer/assets/24918963/26efe75f-1282-4bf6-86f8-acfa6a4fbfcb
+  font:
+    text: Roboto
+    code: Roboto Mono
+  features:
+    - navigation.tracking
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.sections
+    - navigation.expand
+    - navigation.top
+    - navigation.footer
+    - toc.follow
+    - toc.integrate
+
+markdown_extensions:
+  - footnotes
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences:
+      # Required exception to allow Mermaid to function:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:mermaid2.fence_mermaid_custom
+
+plugins:
+  - search # must declare search, because defining any plugins overrides the default list
+  - mermaid2 # pip install mkdocs-mermaid2-plugin
+  - literate-nav: # pip install mkdocs-literate-nav
+      nav_file: SUMMARY.md
+  - section-index: # pip install mkdocs-section-index
+  - gen-files:
+      scripts:
+      - docs/scripts/mkdocs_gen_ref.py
+  - mkdocstrings: # pip install mkdocstrings
+      handlers:
+        python:
+          options: # These need review by ML team: (https://mkdocstrings.github.io/python/usage/configuration/general/)
+            show_bases: true
+            show_source: false
+            load_external_modules: true
+            docstring_options:
+              returns_multiple_items: false
+            docstring_section_style: list
+            merge_init_into_class: true
+            separate_signature: true
+            show_root_toc_entry: false
+            signature_crossrefs: true
+            unwrap_annotated: true
+            docstring_style: google
+
+repo_url: https://github.com/coreweave/tensorizer
+
+nav:
+  - index.md
+  - encryption.md
+  - Architecture:
+      - architecture/index.md
+  - Code Reference: reference/
+  - How To:
+      - how-to/index.md
+      - Advanced Topics:
+          - how-to/index-advanced-topics.md
diff --git a/tensorizer/_linear_partition.py b/tensorizer/_linear_partition.py
@@ -14,6 +14,7 @@ def partition(
     Partitions a sequence of weights into slices with balanced sums,
     without changing the ordering of elements.
     Balancing minimizes the largest sum of any resulting slice.
+
     Args:
         weights: Element weights to balance.
         partitions: The maximum number of slices to return.

diff --git a/tensorizer/_syscalls.py b/tensorizer/_syscalls.py
@@ -63,6 +63,7 @@ def _get_fallocate():
 def has_fallocate() -> bool:
     """
     Checks if the Linux ``fallocate(2)`` syscall is available.
+
     Returns: ``True`` if ``fallocate(2)`` is available, ``False`` otherwise.
     """
     return _fallocate is not None
@@ -83,6 +84,7 @@ def try_fallocate(
             Otherwise, only ignores ``EOPNOTSUPP``.
 
     Returns: ``True`` if fallocate ran successfully, ``False`` otherwise.
+
     Raises:
         OSError: If `suppress_all_errors` is ``False`` and the call failed
             due to an error other than ``EOPNOTSUPP``.

diff --git a/tensorizer/serialization.py b/tensorizer/serialization.py
@@ -886,7 +886,6 @@ class EncryptionParams:
     Do not use this with an insecure key.
 
     Examples:
-
         Using `EncryptionParams.from_string()` with
         an environment variable::
 
@@ -1079,6 +1078,7 @@ def salt(self) -> bytes:
 
         Returns:
             The cryptographic salt used for key derivation.
+
         Raises:
             ValueError: If no salt is being used for key derivation.
         """
@@ -1258,7 +1258,6 @@ class DecryptionParams:
     `EncryptionParams.from_string()` or `EncryptionParams.random()`.
 
     Examples:
-
         Using `DecryptionParams.from_string()` with
         an environment variable::
 
@@ -1330,9 +1329,6 @@ def from_string(
             source: Source string to use for decryption.
             encoding: The encoding to use to convert `source` to ``bytes``
                 if provided as a ``str``. Defaults to UTF-8.
-
-        Returns:
-
         """
         if not source:
             raise ValueError("Source cannot be empty")
@@ -2470,8 +2466,10 @@ def read_tensors(
                 A `HashMismatchError` will be raised if any of the hashes do
                 not match. If ``None``, the value of the `verify_hash` argument
                 passed to the `TensorDeserializer` constructor will be used.
+
         Yields:
-            Tuples of the form (module_idx, tensor_type, name, tensor).
+            (Tuple[int, int, str, torch.Tensor]):
+                Tuples of the form ``(module_idx, tensor_type, name, tensor)``.
 
         Raises:
             HashMismatchError: If `verify_hash` resolves to True and
@@ -2544,20 +2542,23 @@ def read_numpy_arrays(
                 passed to the `TensorDeserializer` constructor will be used.
 
         Yields:
-            Tuples of the form:
-            (
-                module_idx,
-                tensor_type,
-                name,
-                arr,
-                is_opaque,
-                torch_dtype
-            )
-            If the `allow_raw_data` parameter is ``False`` (the default),
-            the final two elements are always ``False`` and ``None``,
-            respectively. Otherwise, ``is_opaque`` may be ``True``, and
-            ``torch_dtype`` will then be a string representing the actual
-            non-numpy datatype represented by the data in `arr`.
+            (Tuple[int, int, str, numpy.ndarray, bool, Optional[str]]):
+                Tuples of the form:
+
+                    (
+                        module_idx,
+                        tensor_type,
+                        name,
+                        arr,
+                        is_opaque,
+                        torch_dtype
+                    )
+
+                If the `allow_raw_data` parameter is ``False`` (the default),
+                the final two elements are always ``False`` and ``None``,
+                respectively. Otherwise, ``is_opaque`` may be ``True``, and
+                ``torch_dtype`` will then be a string representing the actual
+                non-numpy datatype represented by the data in `arr`.
 
         Raises:
             ValueError: If an opaque datatype is encountered in the file
@@ -3104,13 +3105,13 @@ def verify_module(
                 verification failed.
 
         Returns:
-             A 2-tuple ``(passed, results)`` where ``passed`` is a boolean
-             reporting if all checks passed, i.e. the overall result
-             of the verification, and ``results`` is a list of
-             ``(tensor_name, bool)`` tuples listing each tensor that was checked
-             and its individual verification result.
-             ``results`` can be used to tell which tensor failed verification
-             when ``passed`` is False.
+            A 2-tuple ``(passed, results)`` where ``passed`` is a boolean
+            reporting if all checks passed, i.e. the overall result
+            of the verification, and ``results`` is a list of
+            ``(tensor_name, bool)`` tuples listing each tensor
+            that was checked and its individual verification result.
+            ``results`` can be used to tell which tensor
+            failed verification when ``passed`` is False.
 
         Raises:
             RuntimeError: If this function is called before tensor data and
@@ -3497,6 +3498,7 @@ def _pwrite(
 
         Returns:
             The number of bytes written.
+
         Raises:
             OSError: ``verify=True`` and the number of bytes written
                 did not match the length of `data`.