pola-rs · ritchie46 · Aug 5, 2024 · Aug 2, 2024
@@ -19,7 +19,7 @@ body:
       label: Reproducible example
       description: >
         Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to
-        provide a minimal, copy-pastable example. Include the (wrong) output if applicable.
+        provide a minimal, copy-pasteable example. Include the (wrong) output if applicable.
       value: |
         ```python
 

@@ -19,7 +19,7 @@ body:
       label: Reproducible example
       description: >
         Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to
-        provide a minimal, copy-pastable example. Include the (wrong) output if applicable.
+        provide a minimal, copy-pasteable example. Include the (wrong) output if applicable.
       value: |
         ```rust
 

@@ -1060,4 +1060,4 @@ However, for the LazyFrame properties, accessing these may have significant perf
 
 To solve this, we added the `LazyFrame.collect_schema` method, which retrieves the schema and returns a `Schema` object.
 The properties raise a `PerformanceWarning` and tell the user to use `collect_schema` instead.
-We chose not to deprecate the properties for now to facilitatate writing code that is generic for both DataFrames and LazyFrames.
+We chose not to deprecate the properties for now to facilitate writing code that is generic for both DataFrames and LazyFrames.
@@ -61,7 +61,7 @@ def launch_debugging() -> None:
     print(f"pID = {pID}")
 
     # Give the LLDB time to connect. Depending on how long it takes for your LLDB
-    # debugging session to initiatialize, you may have to adjust this setting.
+    # debugging session to initialize, you may have to adjust this setting.
     time.sleep(LLDB_DEBUG_WAIT_TIME_SECONDS)
 
     # Update sys.argv so that when exec() is called, the first argument is the script

@@ -71,7 +71,7 @@
 overloads_location = ["bottom"]
 
 
-# -- Extension settings  -----------------------------------------------------
+# -- Extension settings ------------------------------------------------------
 
 # sphinx.ext.intersphinx - link to other projects' documentation
 # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html

@@ -61,7 +61,7 @@ Note that ``Config`` supports setting context-scoped options. These options
 are valid *only* during scope lifetime, and are reset to their initial values
 (whatever they were before entering the new context) on scope exit.
 
-You can take advantage of this by initialising  a``Config`` instance and then
+You can take advantage of this by initialising a ``Config`` instance and then
 explicitly calling one or more of the available "set\_" methods on it...
 
 .. code-block:: python

@@ -122,7 +122,7 @@ Specifies the table(s) from which to retrieve or delete data.
 
 JOIN
 ----
-Combines rows from two or more tables based on a related column. 
+Combines rows from two or more tables based on a related column.
 
 **Join Types**
 

@@ -175,7 +175,7 @@ Returns the mean of all values in an array.
 **Example:**
 
 .. code-block:: python
-    
+
     df = pl.DataFrame({"foo": [[1, 2], [4, 3, -1]]})
     df.sql("""
       SELECT foo, ARRAY_MEAN(foo) AS foo_mean FROM self

@@ -63,7 +63,7 @@ Returns the greatest value in the list of expressions.
 
     df = pl.DataFrame(
       {
-        "foo": [100, 200, 300, 400], 
+        "foo": [100, 200, 300, 400],
         "bar": [20, 10, 30, 40]
       }
     )

@@ -222,7 +222,7 @@ Returns the value with the first letter capitalized.
 **Example:**
 
 .. code-block:: python
-  
+
     df = pl.DataFrame({"bar": ["zz", "yy", "xx", "ww"]})
     df.sql("""
       SELECT bar, INITCAP(bar) AS baz FROM self

@@ -2339,7 +2339,7 @@ def to_series(self, index: int = 0) -> Series:
 
     def to_init_repr(self, n: int = 1000) -> str:
         """
-        Convert DataFrame to instantiatable string representation.
+        Convert DataFrame to instantiable string representation.
 
         Parameters
         ----------

@@ -302,12 +302,12 @@ def __array_ufunc__(
             # We rename all but the first expression in case someone did e.g.
             # np.divide(pl.col("a"), pl.col("a")); we'll be creating a struct
             # below, and structs can't have duplicate names.
-            first_renamable_expr = True
+            first_renameable_expr = True
             actual_exprs = []
             for inp, is_actual_expr, index in exprs:
                 if is_actual_expr:
-                    if first_renamable_expr:
-                        first_renamable_expr = False
+                    if first_renameable_expr:
+                        first_renameable_expr = False
                     else:
                         inp = inp.alias(f"argument_{index}")
                     actual_exprs.append(inp)
@@ -8579,7 +8579,7 @@ def skew(self, *, bias: bool = True) -> Expr:
 
         is the biased sample :math:`i\texttt{th}` central moment, and
         :math:`\bar{x}` is
-        the sample mean.  If `bias` is False, the calculations are
+        the sample mean. If `bias` is False, the calculations are
         corrected for bias and the value computed is the adjusted
         Fisher-Pearson standardized moment coefficient, i.e.
 
@@ -9663,7 +9663,7 @@ def extend_constant(self, value: IntoExpr, n: int | IntoExprColumn) -> Expr:
         Parameters
         ----------
         value
-            A constant literal value or a unit expressioin with which to extend the
+            A constant literal value or a unit expression with which to extend the
             expression result Series; can pass None to extend with nulls.
         n
             The number of additional values that will be added.

@@ -1025,7 +1025,7 @@ def find(
         --------
         >>> df = pl.DataFrame(
         ...     {
-        ...         "txt": ["Crab", "Lobster", None, "Crustaceon"],
+        ...         "txt": ["Crab", "Lobster", None, "Crustacean"],
         ...         "pat": ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"],
         ...     }
         ... )
@@ -1046,7 +1046,7 @@ def find(
         │ Crab       ┆ 2           ┆ null    │
         │ Lobster    ┆ 5           ┆ 5       │
         │ null       ┆ null        ┆ null    │
-        │ Crustaceon ┆ 5           ┆ 7       │
+        │ Crustacean ┆ 5           ┆ 7       │
         └────────────┴─────────────┴─────────┘
 
         Match against a pattern found in another column or (expression):
@@ -1061,7 +1061,7 @@ def find(
         │ Crab       ┆ a[bc]     ┆ 2        │
         │ Lobster    ┆ b.t       ┆ 2        │
         │ null       ┆ [aeiuo]   ┆ null     │
-        │ Crustaceon ┆ (?i)A[BC] ┆ 5        │
+        │ Crustacean ┆ (?i)A[BC] ┆ 5        │
         └────────────┴───────────┴──────────┘
         """
         pattern = parse_into_expression(pattern, str_as_lit=True)

@@ -2108,7 +2108,7 @@ def rolling_cov(
         The number of values in the window that should be non-null before computing
         a result. If None, it will be set equal to window size.
     ddof
-        Delta degrees of freedom.  The divisor used in calculations
+        Delta degrees of freedom. The divisor used in calculations
         is `N - ddof`, where `N` represents the number of elements.
     """
     if min_periods is None:
@@ -2153,7 +2153,7 @@ def rolling_corr(
         The number of values in the window that should be non-null before computing
         a result. If None, it will be set equal to window size.
     ddof
-        Delta degrees of freedom.  The divisor used in calculations
+        Delta degrees of freedom. The divisor used in calculations
         is `N - ddof`, where `N` represents the number of elements.
     """
     if min_periods is None:

@@ -215,7 +215,7 @@ def _to_ast(expr: str) -> ast.expr:
 
 @singledispatch
 def _convert_predicate(a: Any) -> Any:
-    """Walks the AST to  convert the  PyArrow expression to a PyIceberg expression."""
+    """Walks the AST to convert the PyArrow expression to a PyIceberg expression."""
     msg = f"Unexpected symbol: {a}"
     raise ValueError(msg)
 

@@ -2299,7 +2299,7 @@ def sink_parquet(
             If not set defaults to 1024 * 1024 bytes
         maintain_order
             Maintain the order in which data is processed.
-            Setting this to `False` will  be slightly faster.
+            Setting this to `False` will be slightly faster.
         type_coercion
             Do type coercion optimization.
         predicate_pushdown
@@ -2390,7 +2390,7 @@ def sink_ipc(
             Choose "lz4" for fast compression/decompression.
         maintain_order
             Maintain the order in which data is processed.
-            Setting this to `False` will  be slightly faster.
+            Setting this to `False` will be slightly faster.
         type_coercion
             Do type coercion optimization.
         predicate_pushdown
@@ -2520,7 +2520,7 @@ def sink_csv(
               necessary.
         maintain_order
             Maintain the order in which data is processed.
-            Setting this to `False` will  be slightly faster.
+            Setting this to `False` will be slightly faster.
         type_coercion
             Do type coercion optimization.
         predicate_pushdown

@@ -88,7 +88,7 @@ def _get_dependency_info() -> dict[str, str]:
 
 
 def _get_dependency_version(dep_name: str) -> str:
-    # note: we import 'importlib' here as a significiant optimisation for initial import
+    # note: we import 'importlib' here as a significant optimisation for initial import
     import importlib
     import importlib.metadata
 

@@ -417,7 +417,7 @@ def _export_arrow_to_c(self, out_ptr: int, out_schema_ptr: int) -> None:
 
         Leaking
         If you don't pass the ArrowArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
+        array memory will leak. This is a low-level function intended for
         expert users.
         """
         self._s._export_arrow_to_c(out_ptr, out_schema_ptr)
@@ -4507,7 +4507,7 @@ def to_pandas(
 
     def to_init_repr(self, n: int = 1000) -> str:
         """
-        Convert Series to instantiatable string representation.
+        Convert Series to instantiable string representation.
 
         Parameters
         ----------
@@ -6373,7 +6373,7 @@ def skew(self, *, bias: bool = True) -> float | None:
 
         is the biased sample :math:`i\texttt{th}` central moment, and
         :math:`\bar{x}` is
-        the sample mean.  If `bias` is False, the calculations are
+        the sample mean. If `bias` is False, the calculations are
         corrected for bias and the value computed is the adjusted
         Fisher-Pearson standardized moment coefficient, i.e.
 
@@ -7182,7 +7182,7 @@ def extend_constant(self, value: IntoExpr, n: int | IntoExprColumn) -> Series:
         Parameters
         ----------
         value
-            A constant literal value or a unit expressioin with which to extend the
+            A constant literal value or a unit expression with which to extend the
             expression result Series; can pass None to extend with nulls.
         n
             The number of additional values that will be added.

@@ -484,7 +484,7 @@ def find(
 
         Examples
         --------
-        >>> s = pl.Series("txt", ["Crab", "Lobster", None, "Crustaceon"])
+        >>> s = pl.Series("txt", ["Crab", "Lobster", None, "Crustacean"])
 
         Find the index of the first substring matching a regex pattern:
 

@@ -172,7 +172,7 @@ pub(crate) fn py_object_to_any_value<'py>(
         // with abi3 for versions older than Python 3.10, the APIs that purport
         // to return &str actually just encode to UTF-8 as a newly allocated
         // PyBytes object, and then return reference to that. So what we're
-        // doing here isn't any different fundamantelly, and the APIs to for
+        // doing here isn't any different fundamentally, and the APIs to for
         // converting to &str are deprecated in PyO3 0.21.
         //
         // Once Python 3.10 is the minimum supported version, converting to &str

@@ -51,7 +51,7 @@ pub(crate) struct NodeTraverser {
 
 impl NodeTraverser {
     // Versioning for IR, (major, minor)
-    // Incremement major on breaking changes to the IR (e.g. renaming
+    // Increment major on breaking changes to the IR (e.g. renaming
     // fields, reordering tuples), minor on backwards compatible
     // changes (e.g. exposing a new expression node).
     const VERSION: Version = (1, 0);

@@ -23,15 +23,15 @@
 #
 # <token> | <weight> # comment
 #
-# Distributions are used to bias the selection of a token 
-# based on its associated weight. The list of tokens and values 
+# Distributions are used to bias the selection of a token
+# based on its associated weight. The list of tokens and values
 # between the keywords BEGIN and END define the distribution named after
 # the BEGIN. A uniformly random value from [0, sum(weights)]
 # will be chosen and the first token whose cumulative weight is greater than
 # or equal to the result will be returned. In essence, the weights for each
 # token represent its relative weight within a distribution.
 #
-# one special token is defined: count (number of data points in the 
+# one special token is defined: count (number of data points in the
 #  distribution). It MUST be defined for each named distribution.
 #-----------------------------------------------------------------------
 # currently defined distributions and their use:
@@ -49,11 +49,11 @@
 #  rflag      lineitems.returnflag
 #  types      parts.type
 #  colors     embedded string creation; CANNOT BE USED FOR pick_str(), agg_str() perturbs order
-#  articles   comment generation 
-#  nouns      
-#  verbs      
-#  adverbs    
-#  auxillaries 
+#  articles   comment generation
+#  nouns
+#  verbs
+#  adverbs
+#  auxillaries
 #  prepositions
 #  terminators
 #  grammar    sentence formation
@@ -693,7 +693,7 @@ near|1
 of|1
 on|1
 outside|1
-over|1 
+over|1
 past|1
 since|1
 through|1
@@ -702,7 +702,7 @@ to|1
 toward|1
 under|1
 until|1
-up|1 
+up|1
 upon|1
 whithout|1
 with|1
@@ -794,7 +794,7 @@ N P V P T|1
 END grammar
 ###
 # NP
-# second level grammar. Noun phrases. N=noun, A=article, 
+# second level grammar. Noun phrases. N=noun, A=article,
 # J=adjective, D=adverb
 ##
 BEGIN np
@@ -806,7 +806,7 @@ D J N|50
 END np
 ###
 # VP
-# second level grammar. Verb phrases. V=verb, X=auxiallary, 
+# second level grammar. Verb phrases. V=verb, X=auxiallary,
 # D=adverb
 ##
 BEGIN vp
@@ -818,7 +818,7 @@ X V D|1
 END vp
 ###
 # Q13
-# Substitution parameters for Q13 
+# Substitution parameters for Q13
 ##
 BEGIN Q13a
 COUNT|4

@@ -148,7 +148,7 @@ def test_generalized_ufunc_missing_data() -> None:
 
     While this particular example isn't necessarily a semantic issue, consider
     a mean() function running on integers: it will give wrong results if the
-    input is missing data, since NumPy has no way to model missing slots.  In
+    input is missing data, since NumPy has no way to model missing slots. In
     the general case, we can't assume the function will handle missing data
     correctly.
     """

@@ -14,7 +14,7 @@
 def test_run_on_pandas() -> None:
     # Simple join example, missing multiple columns, slices, etc.
     def join(
-        inputs: list[Callable[[], pd.DataFrame]], obj: Any, _node_traverer: Any
+        inputs: list[Callable[[], pd.DataFrame]], obj: Any, _node_traverser: Any
     ) -> Callable[[], pd.DataFrame]:
         assert len(obj.left_on) == 1
         assert len(obj.right_on) == 1

@@ -508,7 +508,7 @@ def test_list_slice_5866() -> None:
 
 def test_list_gather() -> None:
     s = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8]])
-    # mypy: we make it work, but idomatic is `arr.get`.
+    # mypy: we make it work, but idiomatic is `arr.get`.
     assert s.list.gather(0).to_list() == [[1], [4], [6]]  # type: ignore[arg-type]
     assert s.list.gather([0, 1]).to_list() == [[1, 2], [4, 5], [6, 7]]
 

@@ -938,7 +938,7 @@ def test_rolling_min_periods(
     )["value"]
     assert_series_equal(result, pl.Series("value", expected, pl.Int64))
 
-    # Startig with unsorted data
+    # Starting with unsorted data
     result = (
         df.sort("date", descending=True)
         .with_columns(
-Original file line number
+Diff line change
@@ Expand Up @@
     JOIN
     ----
-    Combines rows from two or more tables based on a related column.
+    Combines rows from two or more tables based on a related column.
     **Join Types**
@@ Expand Down @@