From e6d02d7d5a08fdcaff50f14290bac10f844d4f92 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Wed, 6 Mar 2024 19:23:31 +0400 Subject: [PATCH 1/4] docs: Improve Python API docs --- py-polars/docs/requirements-docs.txt | 1 + py-polars/docs/source/_static/css/custom.css | 16 ++- py-polars/docs/source/conf.py | 8 ++ py-polars/docs/source/reference/api.rst | 4 +- py-polars/docs/source/reference/config.rst | 8 +- py-polars/docs/source/reference/datatypes.rst | 2 +- .../reference/expressions/functions.rst | 2 +- .../source/reference/expressions/index.rst | 2 +- py-polars/docs/source/reference/index.rst | 126 +++++++++++++++--- py-polars/docs/source/reference/sql.rst | 6 +- 10 files changed, 139 insertions(+), 36 deletions(-) diff --git a/py-polars/docs/requirements-docs.txt b/py-polars/docs/requirements-docs.txt index 75e19f79fcc3..7da7161639ff 100644 --- a/py-polars/docs/requirements-docs.txt +++ b/py-polars/docs/requirements-docs.txt @@ -14,6 +14,7 @@ sphinx-autosummary-accessors==2023.4.0 sphinx-copybutton==0.5.2 sphinx-design==0.5.0 sphinx-favicon==1.0.1 +sphinx_reredirects==0.1.3 sphinx-toolbox==3.5.0 livereload==2.6.3 diff --git a/py-polars/docs/source/_static/css/custom.css b/py-polars/docs/source/_static/css/custom.css index 9cdd3b3591d8..7732a4a2a2d2 100644 --- a/py-polars/docs/source/_static/css/custom.css +++ b/py-polars/docs/source/_static/css/custom.css @@ -24,26 +24,36 @@ html[data-theme="dark"] { --pst-color-border: #444444; } +/* add subtle gradients to sidebar and card elements */ div.bd-sidebar-primary { background-image: linear-gradient(90deg, var(--pst-gradient-sidebar-left) 0%, var(--pst-gradient-sidebar-right) 100%); } +div.sd-card { + background-image: linear-gradient(0deg, var(--pst-gradient-sidebar-left) 0%, var(--pst-gradient-sidebar-right) 100%); +} +/* match docs footer colour to the header */ footer.bd-footer { background-color: var(--pst-color-on-background); } /* - We're not currently doing anything meaningful with the right - ToC, so hide until there's actually something to put there... + we're not currently doing anything meaningful with the + right toc, so hide until there's something to put there */ div.bd-sidebar-secondary { display: none; } - label.sidebar-toggle.secondary-toggle { display: none !important; } +/* fix visited link colour */ a:visited { color: var(--pst-color-link); } + +/* fix ugly navbar scrollbar display */ +.sidebar-primary-items__end { + margin: 0 !important; +} diff --git a/py-polars/docs/source/conf.py b/py-polars/docs/source/conf.py index fe70f5c13641..99ecc8229574 100644 --- a/py-polars/docs/source/conf.py +++ b/py-polars/docs/source/conf.py @@ -21,12 +21,14 @@ # Add py-polars directory sys.path.insert(0, str(Path("../..").resolve())) + # -- Project information ----------------------------------------------------- project = "Polars" author = "Ritchie Vink" copyright = f"2020, {author}" + # -- General configuration --------------------------------------------------- extensions = [ @@ -44,6 +46,7 @@ "sphinx_copybutton", "sphinx_design", "sphinx_favicon", + "sphinx_reredirects", "sphinx_toolbox.more_autodoc.overloads", ] @@ -67,6 +70,7 @@ # https://sphinx-toolbox.readthedocs.io/en/latest/ overloads_location = ["bottom"] + # -- Extension settings ----------------------------------------------------- # sphinx.ext.intersphinx - link to other projects' documentation @@ -89,6 +93,10 @@ copybutton_prompt_text = r">>> |\.\.\. " copybutton_prompt_is_regexp = True +# redirect empty root to the actual landing page +redirects = {"index": "reference/index.html"} + + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. diff --git a/py-polars/docs/source/reference/api.rst b/py-polars/docs/source/reference/api.rst index 54e8ed02b4b5..8cd9dc77475b 100644 --- a/py-polars/docs/source/reference/api.rst +++ b/py-polars/docs/source/reference/api.rst @@ -7,7 +7,7 @@ Providing new functionality --------------------------- These functions allow you to register custom functionality in a dedicated -namespace on the underlying polars classes without requiring subclassing +namespace on the underlying Polars classes without requiring subclassing or mixins. Expr, DataFrame, LazyFrame, and Series are all supported targets. This feature is primarily intended for use by library authors providing @@ -29,7 +29,7 @@ Available registrations .. note:: - You cannot override existing polars namespaces (such as ``.str`` or ``.dt``), and attempting to do so + You cannot override existing Polars namespaces (such as ``.str`` or ``.dt``), and attempting to do so will raise an `AttributeError `_. However, you *can* override other custom namespaces (which will only generate a `UserWarning `_). diff --git a/py-polars/docs/source/reference/config.rst b/py-polars/docs/source/reference/config.rst index 452ecd98c25c..a8137cdff7a0 100644 --- a/py-polars/docs/source/reference/config.rst +++ b/py-polars/docs/source/reference/config.rst @@ -34,8 +34,8 @@ Config options Config.set_trim_decimal_zeros Config.set_verbose -Config load, save, and current state ------------------------------------- +Config load, save, state +------------------------ .. autosummary:: :toctree: api/ @@ -81,8 +81,8 @@ explicitly calling one or more of the available "set\_" methods on it... with pl.Config(verbose=True): do_various_things() -Use as a function decorator ---------------------------- +Use as a decorator +------------------ In the same vein, you can also use ``Config`` as a function decorator to temporarily set options for the duration of the function call: diff --git a/py-polars/docs/source/reference/datatypes.rst b/py-polars/docs/source/reference/datatypes.rst index 3e538998b002..695923e92885 100644 --- a/py-polars/docs/source/reference/datatypes.rst +++ b/py-polars/docs/source/reference/datatypes.rst @@ -53,8 +53,8 @@ Other :toctree: api/ :nosignatures: - Boolean Binary + Boolean Categorical Enum Null diff --git a/py-polars/docs/source/reference/expressions/functions.rst b/py-polars/docs/source/reference/expressions/functions.rst index 3fad1cb7f989..d240454e136b 100644 --- a/py-polars/docs/source/reference/expressions/functions.rst +++ b/py-polars/docs/source/reference/expressions/functions.rst @@ -2,7 +2,7 @@ Functions ========= -These functions are available from the polars module root and can be used as expressions, and sometimes also in eager contexts. +These functions are available from the Polars module root and can be used as expressions, and sometimes also in eager contexts. ---- diff --git a/py-polars/docs/source/reference/expressions/index.rst b/py-polars/docs/source/reference/expressions/index.rst index 6c87796c3081..5f9b8b541dad 100644 --- a/py-polars/docs/source/reference/expressions/index.rst +++ b/py-polars/docs/source/reference/expressions/index.rst @@ -2,7 +2,7 @@ Expressions =========== -This page gives an overview of all public polars expressions. +This page gives an overview of all public Polars expressions. .. toctree:: :maxdepth: 2 diff --git a/py-polars/docs/source/reference/index.rst b/py-polars/docs/source/reference/index.rst index d99d14bb5565..c45da2da4dce 100644 --- a/py-polars/docs/source/reference/index.rst +++ b/py-polars/docs/source/reference/index.rst @@ -1,24 +1,108 @@ -============= -API reference -============= +==================== +Python API reference +==================== -This page gives an overview of all public polars objects, functions and +This page gives a high-level overview of all public Polars objects, functions and methods. All classes and functions exposed in ``polars.*`` namespace are public. -.. toctree:: - :maxdepth: 2 - - io - series/index - dataframe/index - lazyframe/index - expressions/index - selectors - api - functions - datatypes - config - exceptions - testing - sql - metadata + +.. grid:: + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + dataframe/index + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + lazyframe/index + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + series/index + + +.. grid:: + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + expressions/index + selectors + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + functions + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + datatypes + + +.. grid:: + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + io + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + config + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + api + + +.. grid:: + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + sql + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 1 + + exceptions + + .. toctree:: + :maxdepth: 2 + + testing + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 1 + + metadata diff --git a/py-polars/docs/source/reference/sql.rst b/py-polars/docs/source/reference/sql.rst index 2b1c323e7148..bf28f9cc6e20 100644 --- a/py-polars/docs/source/reference/sql.rst +++ b/py-polars/docs/source/reference/sql.rst @@ -1,6 +1,6 @@ -=== -SQL -=== +============= +SQL Interface +============= .. currentmodule:: polars .. py:class:: SQLContext From a5332bca6209921ec85bdcaae1241cd4403ad540 Mon Sep 17 00:00:00 2001 From: alexander-beedie Date: Wed, 6 Mar 2024 23:30:27 +0400 Subject: [PATCH 2/4] misc minor updates --- py-polars/docs/source/reference/index.rst | 2 +- py-polars/docs/source/reference/io.rst | 105 +++++++++++----------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/py-polars/docs/source/reference/index.rst b/py-polars/docs/source/reference/index.rst index c45da2da4dce..e5bce90e43bb 100644 --- a/py-polars/docs/source/reference/index.rst +++ b/py-polars/docs/source/reference/index.rst @@ -3,7 +3,7 @@ Python API reference ==================== This page gives a high-level overview of all public Polars objects, functions and -methods. All classes and functions exposed in ``polars.*`` namespace are public. +methods. All classes and functions exposed in the ``polars.*`` namespace are public. .. grid:: diff --git a/py-polars/docs/source/reference/io.rst b/py-polars/docs/source/reference/io.rst index efc9e96603a8..474e7576a652 100644 --- a/py-polars/docs/source/reference/io.rst +++ b/py-polars/docs/source/reference/io.rst @@ -3,6 +3,14 @@ Input/output ============ .. currentmodule:: polars +Avro +~~~~ +.. autosummary:: + :toctree: api/ + + read_avro + DataFrame.write_avro + CSV ~~~ .. autosummary:: @@ -14,29 +22,14 @@ CSV DataFrame.write_csv LazyFrame.sink_csv -Feather/ IPC -~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - read_ipc - read_ipc_stream - scan_ipc - read_ipc_schema - DataFrame.write_ipc - DataFrame.write_ipc_stream - LazyFrame.sink_ipc +.. currentmodule:: polars.io.csv.batched_reader -Parquet -~~~~~~~ .. autosummary:: :toctree: api/ - read_parquet - scan_parquet - read_parquet_schema - DataFrame.write_parquet - LazyFrame.sink_parquet + BatchedCsvReader.next_batches + +.. currentmodule:: polars Database ~~~~~~~~ @@ -47,27 +40,16 @@ Database read_database_uri DataFrame.write_database -JSON -~~~~ -.. autosummary:: - :toctree: api/ - - read_json - read_ndjson - scan_ndjson - DataFrame.write_json - DataFrame.write_ndjson - LazyFrame.sink_ndjson - -AVRO -~~~~ +Delta Lake +~~~~~~~~~~ .. autosummary:: :toctree: api/ - read_avro - DataFrame.write_avro + scan_delta + read_delta + DataFrame.write_delta -Spreadsheet +Excel / ODS ~~~~~~~~~~~ .. autosummary:: :toctree: api/ @@ -76,39 +58,54 @@ Spreadsheet read_ods DataFrame.write_excel -Apache Iceberg -~~~~~~~~~~~~~~ +Feather / IPC +~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - scan_iceberg + read_ipc + read_ipc_stream + scan_ipc + read_ipc_schema + DataFrame.write_ipc + DataFrame.write_ipc_stream + LazyFrame.sink_ipc -Delta Lake -~~~~~~~~~~ +Iceberg +~~~~~~~ .. autosummary:: :toctree: api/ - scan_delta - read_delta - DataFrame.write_delta - -Datasets -~~~~~~~~ -Connect to pyarrow datasets. + scan_iceberg +JSON +~~~~ .. autosummary:: :toctree: api/ - scan_pyarrow_dataset + read_json + read_ndjson + scan_ndjson + DataFrame.write_json + DataFrame.write_ndjson + LazyFrame.sink_ndjson + +Parquet +~~~~~~~ +.. autosummary:: + :toctree: api/ + read_parquet + scan_parquet + read_parquet_schema + DataFrame.write_parquet + LazyFrame.sink_parquet -BatchedCsvReader +PyArrow Datasets ~~~~~~~~~~~~~~~~ -This reader comes available by calling `pl.read_csv_batched`. - -.. currentmodule:: polars.io.csv.batched_reader +Connect to pyarrow datasets. .. autosummary:: :toctree: api/ - BatchedCsvReader.next_batches + scan_pyarrow_dataset From cac54df948db32ed8af2d78256f074124f1e322f Mon Sep 17 00:00:00 2001 From: alexander-beedie Date: Thu, 7 Mar 2024 00:11:41 +0400 Subject: [PATCH 3/4] improve IO docs ordering --- py-polars/docs/source/reference/io.rst | 6 +++--- py-polars/polars/io/__init__.py | 2 +- py-polars/polars/io/csv/__init__.py | 2 ++ py-polars/polars/io/csv/batched_reader.py | 6 ++---- py-polars/tests/unit/io/test_csv.py | 6 +++++- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/py-polars/docs/source/reference/io.rst b/py-polars/docs/source/reference/io.rst index 474e7576a652..d3c45469f94a 100644 --- a/py-polars/docs/source/reference/io.rst +++ b/py-polars/docs/source/reference/io.rst @@ -45,8 +45,8 @@ Delta Lake .. autosummary:: :toctree: api/ - scan_delta read_delta + scan_delta DataFrame.write_delta Excel / ODS @@ -64,9 +64,9 @@ Feather / IPC :toctree: api/ read_ipc + read_ipc_schema read_ipc_stream scan_ipc - read_ipc_schema DataFrame.write_ipc DataFrame.write_ipc_stream LazyFrame.sink_ipc @@ -96,8 +96,8 @@ Parquet :toctree: api/ read_parquet - scan_parquet read_parquet_schema + scan_parquet DataFrame.write_parquet LazyFrame.sink_parquet diff --git a/py-polars/polars/io/__init__.py b/py-polars/polars/io/__init__.py index f4a39b5f778a..395f15bd4c94 100644 --- a/py-polars/polars/io/__init__.py +++ b/py-polars/polars/io/__init__.py @@ -21,8 +21,8 @@ "read_delta", "read_excel", "read_ipc", - "read_ipc_stream", "read_ipc_schema", + "read_ipc_stream", "read_json", "read_ndjson", "read_ods", diff --git a/py-polars/polars/io/csv/__init__.py b/py-polars/polars/io/csv/__init__.py index b18232f10346..cf5a2646240d 100644 --- a/py-polars/polars/io/csv/__init__.py +++ b/py-polars/polars/io/csv/__init__.py @@ -1,6 +1,8 @@ +from polars.io.csv.batched_reader import BatchedCsvReader from polars.io.csv.functions import read_csv, read_csv_batched, scan_csv __all__ = [ + "BatchedCsvReader", "read_csv", "read_csv_batched", "scan_csv", diff --git a/py-polars/polars/io/csv/batched_reader.py b/py-polars/polars/io/csv/batched_reader.py index 201b578be964..101672f7a5e9 100644 --- a/py-polars/polars/io/csv/batched_reader.py +++ b/py-polars/polars/io/csv/batched_reader.py @@ -110,14 +110,12 @@ def next_batches(self, n: int) -> list[DataFrame] | None: """ Read `n` batches from the reader. - The `n` chunks will be parallelized over the - available threads. + These batches will be parallelized over the available threads. Parameters ---------- n - Number of chunks to fetch. - This is ideally >= number of threads + Number of chunks to fetch; ideally this is >= number of threads. Examples -------- diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py index a940e26f3697..6809f30e7978 100644 --- a/py-polars/tests/unit/io/test_csv.py +++ b/py-polars/tests/unit/io/test_csv.py @@ -16,6 +16,7 @@ import polars as pl from polars._utils.various import normalize_filepath from polars.exceptions import ComputeError, NoDataError +from polars.io.csv import BatchedCsvReader from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -1414,8 +1415,9 @@ def test_csv_categorical_categorical_merge() -> None: def test_batched_csv_reader(foods_file_path: Path) -> None: reader = pl.read_csv_batched(foods_file_path, batch_size=4) - batches = reader.next_batches(5) + assert isinstance(reader, BatchedCsvReader) + batches = reader.next_batches(5) assert batches is not None assert len(batches) == 5 assert batches[0].to_dict(as_series=False) == { @@ -1431,10 +1433,12 @@ def test_batched_csv_reader(foods_file_path: Path) -> None: "sugars_g": [25, 0, 5, 11], } assert_frame_equal(pl.concat(batches), pl.read_csv(foods_file_path)) + # the final batch of the low-memory variant is different reader = pl.read_csv_batched(foods_file_path, batch_size=4, low_memory=True) batches = reader.next_batches(5) assert len(batches) == 5 # type: ignore[arg-type] + batches += reader.next_batches(5) # type: ignore[operator] assert_frame_equal(pl.concat(batches), pl.read_csv(foods_file_path)) From 09a0a71d2e95a3902c865f615a485b6731be5c26 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 13 Mar 2024 08:24:02 +0100 Subject: [PATCH 4/4] Update a reference --- docs/development/versioning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development/versioning.md b/docs/development/versioning.md index 2d8009e8dbe5..727048b439d2 100644 --- a/docs/development/versioning.md +++ b/docs/development/versioning.md @@ -31,7 +31,7 @@ We know it takes time and energy for our users to keep up with new releases but, **A breaking change occurs when an existing component of the public API is changed or removed.** -A feature is part of the public API if it is documented in the [API reference](https://docs.pola.rs/py-polars/html/reference/). +A feature is part of the public API if it is documented in the [API reference](https://docs.pola.rs/py-polars/html/reference/index.html). Examples of breaking changes: