diff --git a/examples/docker/modin-hdk/plasticc-hdk.py b/examples/docker/modin-hdk/plasticc-hdk.py index bf39a41d44b..704e0fe49cd 100644 --- a/examples/docker/modin-hdk/plasticc-hdk.py +++ b/examples/docker/modin-hdk/plasticc-hdk.py @@ -12,7 +12,6 @@ # governing permissions and limitations under the License. import sys -from collections import OrderedDict from functools import partial import numpy as np @@ -23,7 +22,7 @@ ################ helper functions ############################### def create_dtypes(): - dtypes = OrderedDict( + dtypes = dict( [ ("object_id", "int32"), ("mjd", "float32"), @@ -50,7 +49,7 @@ def create_dtypes(): "target", ] meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"] - meta_dtypes = OrderedDict( + meta_dtypes = dict( [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))] ) return dtypes, meta_dtypes diff --git a/examples/docker/modin-ray/plasticc.py b/examples/docker/modin-ray/plasticc.py index fc55be84b8a..1c0cddadd1b 100644 --- a/examples/docker/modin-ray/plasticc.py +++ b/examples/docker/modin-ray/plasticc.py @@ -13,7 +13,6 @@ import sys import time -from collections import OrderedDict from functools import partial import numpy as np @@ -29,7 +28,7 @@ ################ helper functions ############################### def create_dtypes(): - dtypes = OrderedDict( + dtypes = dict( [ ("object_id", "int32"), ("mjd", "float32"), @@ -56,7 +55,7 @@ def create_dtypes(): "target", ] meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"] - meta_dtypes = OrderedDict( + meta_dtypes = dict( [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))] ) return dtypes, meta_dtypes diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py index fc15c81496e..e3b92afdf2c 100644 --- a/modin/core/dataframe/pandas/dataframe/dataframe.py +++ b/modin/core/dataframe/pandas/dataframe/dataframe.py @@ -18,7 +18,6 @@ for pandas storage format. """ import datetime -from collections import OrderedDict from typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union import numpy as np @@ -1686,7 +1685,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): Returns ------- - OrderedDict + dict A mapping from partition index to list of internal indices which correspond to `indices` in each partition. """ @@ -1700,7 +1699,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): # Converting range-like indexer to slice indices = slice(indices.start, indices.stop, indices.step) if is_full_grab_slice(indices, sequence_len=len(self.get_axis(axis))): - return OrderedDict( + return dict( zip( range(self._partitions.shape[axis]), [slice(None)] * self._partitions.shape[axis], @@ -1708,25 +1707,23 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): ) # Empty selection case if indices.start == indices.stop and indices.start is not None: - return OrderedDict() + return dict() if indices.start is None or indices.start == 0: last_part, last_idx = list( self._get_dict_of_block_index(axis, [indices.stop]).items() )[0] - dict_of_slices = OrderedDict( - zip(range(last_part), [slice(None)] * last_part) - ) + dict_of_slices = dict(zip(range(last_part), [slice(None)] * last_part)) dict_of_slices.update({last_part: slice(last_idx[0])}) return dict_of_slices elif indices.stop is None or indices.stop >= len(self.get_axis(axis)): first_part, first_idx = list( self._get_dict_of_block_index(axis, [indices.start]).items() )[0] - dict_of_slices = OrderedDict({first_part: slice(first_idx[0], None)}) + dict_of_slices = dict({first_part: slice(first_idx[0], None)}) num_partitions = np.size(self._partitions, axis=axis) part_list = range(first_part + 1, num_partitions) dict_of_slices.update( - OrderedDict(zip(part_list, [slice(None)] * len(part_list))) + dict(zip(part_list, [slice(None)] * len(part_list))) ) return dict_of_slices else: @@ -1737,10 +1734,10 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): self._get_dict_of_block_index(axis, [indices.stop]).items() )[0] if first_part == last_part: - return OrderedDict({first_part: slice(first_idx[0], last_idx[0])}) + return dict({first_part: slice(first_idx[0], last_idx[0])}) else: if last_part - first_part == 1: - return OrderedDict( + return dict( # FIXME: this dictionary creation feels wrong - it might not maintain the order { first_part: slice(first_idx[0], None), @@ -1748,12 +1745,10 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): } ) else: - dict_of_slices = OrderedDict( - {first_part: slice(first_idx[0], None)} - ) + dict_of_slices = dict({first_part: slice(first_idx[0], None)}) part_list = range(first_part + 1, last_part) dict_of_slices.update( - OrderedDict(zip(part_list, [slice(None)] * len(part_list))) + dict(zip(part_list, [slice(None)] * len(part_list))) ) dict_of_slices.update({last_part: slice(None, last_idx[0])}) return dict_of_slices @@ -1765,7 +1760,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False): # This will help preserve metadata stored in empty dataframes (indexes and dtypes) # Otherwise, we will get an empty `new_partitions` array, from which it will # no longer be possible to obtain metadata - return OrderedDict([(0, np.array([], dtype=np.int64))]) + return dict([(0, np.array([], dtype=np.int64))]) negative_mask = np.less(indices, 0) has_negative = np.any(negative_mask) if has_negative: @@ -1827,7 +1822,7 @@ def internal(block_idx: int, global_index): for i in range(1, len(count_for_each_partition)) if count_for_each_partition[i] > count_for_each_partition[i - 1] ] - return OrderedDict(partition_ids_with_indices) + return dict(partition_ids_with_indices) @staticmethod def _join_index_objects(axis, indexes, how, sort): diff --git a/modin/core/io/io.py b/modin/core/io/io.py index a024d0c6c0e..5d31a64f437 100644 --- a/modin/core/io/io.py +++ b/modin/core/io/io.py @@ -17,7 +17,6 @@ `BaseIO` is base class for IO classes, that stores IO functions. """ -from collections import OrderedDict from typing import Any import pandas @@ -273,8 +272,8 @@ def read_clipboard(cls, sep=r"\s+", **kwargs): # pragma: no cover # noqa: PR01 @doc( _doc_default_io_method, summary="Read an Excel file into query compiler", - returns="""BaseQueryCompiler or dict/OrderedDict : - QueryCompiler or OrderedDict/dict with read data.""", + returns="""BaseQueryCompiler or dict : + QueryCompiler or dict with read data.""", ) def read_excel(cls, **kwargs): # noqa: PR01 ErrorMessage.default_to_pandas("`read_excel`") @@ -285,7 +284,7 @@ def read_excel(cls, **kwargs): # noqa: PR01 # pd.ExcelFile in `read_excel` isn't supported kwargs["io"]._set_pandas_mode() intermediate = pandas.read_excel(**kwargs) - if isinstance(intermediate, (OrderedDict, dict)): + if isinstance(intermediate, dict): parsed = type(intermediate)() for key in intermediate.keys(): parsed[key] = cls.from_pandas(intermediate.get(key)) diff --git a/modin/core/storage_formats/cudf/parser.py b/modin/core/storage_formats/cudf/parser.py index 1cfa7680893..ac206cfacb8 100644 --- a/modin/core/storage_formats/cudf/parser.py +++ b/modin/core/storage_formats/cudf/parser.py @@ -12,7 +12,6 @@ # governing permissions and limitations under the License. import warnings -from collections import OrderedDict from io import BytesIO import numpy as np @@ -83,7 +82,7 @@ def single_worker_read(cls, fname, *, reason, **kwargs): ) ) return pandas_frame - elif isinstance(pandas_frame, (OrderedDict, dict)): + elif isinstance(pandas_frame, dict): return { i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls) for i, frame in pandas_frame.items() diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py index ceb10658381..b677a541d2e 100644 --- a/modin/core/storage_formats/pandas/parsers.py +++ b/modin/core/storage_formats/pandas/parsers.py @@ -43,7 +43,6 @@ import json import os import warnings -from collections import OrderedDict from io import BytesIO, IOBase, TextIOWrapper from typing import Any, NamedTuple @@ -313,7 +312,7 @@ def single_worker_read(cls, fname, *args, reason: str, **kwargs): ) ) return pandas_frame - elif isinstance(pandas_frame, (OrderedDict, dict)): + elif isinstance(pandas_frame, dict): return { i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls) for i, frame in pandas_frame.items() diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py index 445eccac6a8..b5932a632ed 100644 --- a/modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py +++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py @@ -14,7 +14,6 @@ """Module provides ``HdkOnNativeDataframe`` class implementing lazy frame.""" import re -from collections import OrderedDict from typing import Hashable, Iterable, List, Optional, Tuple, Union import numpy as np @@ -464,7 +463,7 @@ def take_2d_labels_or_positional( # Sort by the rowid column base = base.copy(op=SortNode(base, [rowid_col], [False], "last")) # Remove the rowid column - exprs = OrderedDict() + exprs = dict() for col in table_cols: exprs[col] = base.ref(col) base = base.copy( @@ -614,7 +613,7 @@ def generate_by_name(by): else: return by - exprs = OrderedDict( + exprs = dict( ((generate_by_name(col), by_frame.ref(col)) for col in groupby_cols) ) groupby_cols = list(exprs.keys()) @@ -647,7 +646,7 @@ def generate_by_name(by): new_dtypes = base._dtypes[groupby_cols].tolist() - agg_exprs = OrderedDict() + agg_exprs = dict() if isinstance(agg, str): col_to_ref = {col: base.ref(col) for col in agg_cols} self._add_agg_exprs(agg, col_to_ref, kwargs, agg_exprs) @@ -799,7 +798,7 @@ def agg(self, agg): """ assert isinstance(agg, str) - agg_exprs = OrderedDict() + agg_exprs = dict() for col in self.columns: agg_exprs[col] = AggregateExpr(agg, self.ref(col)) @@ -1089,7 +1088,7 @@ def join( if isinstance(self._op, FrameNode): other = self.copy() else: - exprs = OrderedDict((c, self.ref(c)) for c in self._table_cols) + exprs = dict((c, self.ref(c)) for c in self._table_cols) other = self.__constructor__( columns=self.columns, dtypes=self._dtypes_for_exprs(exprs), @@ -1129,7 +1128,7 @@ def join( else: ignore_index = True index_cols = None - exprs = OrderedDict() + exprs = dict() new_dtypes = [] new_columns, left_renamer, right_renamer = join_columns( @@ -1235,7 +1234,7 @@ def _union_all( The new frame. """ index_cols = None - col_name_to_dtype = OrderedDict() + col_name_to_dtype = dict() for col in self.columns: col_name_to_dtype[col] = self._dtypes[col] @@ -1287,7 +1286,7 @@ def _union_all( ) if sort: - col_name_to_dtype = OrderedDict( + col_name_to_dtype = dict( (col, col_name_to_dtype[col]) for col in sorted(col_name_to_dtype) ) @@ -1308,7 +1307,7 @@ def _union_all( or any(frame_dtypes.index != dtypes.index) or any(frame_dtypes.values != dtypes.values) ): - exprs = OrderedDict() + exprs = dict() uses_rowid = False for col in table_col_name_to_dtype: if col in frame_dtypes: @@ -1785,7 +1784,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position): drop_index_cols_after = None if drop_index_cols_before: - exprs = OrderedDict() + exprs = dict() index_cols = ( drop_index_cols_after if drop_index_cols_after else None ) @@ -1810,7 +1809,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position): ) if drop_index_cols_after: - exprs = OrderedDict() + exprs = dict() for col in base.columns: exprs[col] = base.ref(col) base = base.__constructor__( @@ -1950,7 +1949,7 @@ def _materialize_rowid(self): """ name = self._index_cache.get().name if self.has_materialized_index else None name = mangle_index_names([name])[0] - exprs = OrderedDict() + exprs = dict() exprs[name] = self.ref(ROWID_COL_NAME) for col in self._table_cols: exprs[col] = self.ref(col) @@ -1974,7 +1973,7 @@ def _index_exprs(self): ------- dict """ - exprs = OrderedDict() + exprs = dict() if self._index_cols: for col in self._index_cols: exprs[col] = self.ref(col) @@ -2290,7 +2289,7 @@ def reset_index(self, drop): The new frame. """ if drop: - exprs = OrderedDict() + exprs = dict() for c in self.columns: exprs[c] = self.ref(c) return self.__constructor__( @@ -2306,7 +2305,7 @@ def reset_index(self, drop): "default index reset with no drop is not supported" ) # Need to demangle index names. - exprs = OrderedDict() + exprs = dict() for i, c in enumerate(self._index_cols): name = ColNameCodec.demangle_index_name(c) if name is None: @@ -2542,7 +2541,7 @@ def set_index_name(self, name): return self names = mangle_index_names([name]) - exprs = OrderedDict() + exprs = dict() if self._index_cols is None: exprs[names[0]] = self.ref(ROWID_COL_NAME) else: @@ -2597,7 +2596,7 @@ def set_index_names(self, names): ) names = mangle_index_names(names) - exprs = OrderedDict() + exprs = dict() for old, new in zip(self._index_cols, names): exprs[new] = self.ref(old) for col in self.columns: diff --git a/modin/experimental/core/io/sql/utils.py b/modin/experimental/core/io/sql/utils.py index c201bd29fc1..530f300df3e 100644 --- a/modin/experimental/core/io/sql/utils.py +++ b/modin/experimental/core/io/sql/utils.py @@ -13,8 +13,6 @@ """Utilities for experimental SQL format type IO functions implementations.""" -from collections import OrderedDict - import pandas import pandas._libs.lib as lib from sqlalchemy import MetaData, Table, create_engine, inspect @@ -109,10 +107,10 @@ def get_table_columns(metadata): Returns ------- - OrderedDict + dict Dictionary with columns names and python types. """ - cols = OrderedDict() + cols = dict() for col in metadata.c: name = str(col).rpartition(".")[2] cols[name] = col.type.python_type.__name__ @@ -165,14 +163,14 @@ def get_query_columns(engine, query): Returns ------- - OrderedDict + dict Dictionary with columns names and python types. """ con = engine.connect() result = con.execute(query).fetchone() values = list(result) cols_names = list(result.keys()) - cols = OrderedDict() + cols = dict() for i in range(len(cols_names)): cols[cols_names[i]] = type(values[i]).__name__ return cols @@ -186,7 +184,7 @@ def check_partition_column(partition_column, cols): ---------- partition_column : str Column name used for data partitioning between the workers. - cols : OrderedDict/dict + cols : dict Dictionary with columns names and python types. """ for k, v in cols.items(): diff --git a/modin/pandas/io.py b/modin/pandas/io.py index 76d7416e027..8045d8203af 100644 --- a/modin/pandas/io.py +++ b/modin/pandas/io.py @@ -25,7 +25,6 @@ import inspect import pathlib import pickle -from collections import OrderedDict from typing import ( IO, TYPE_CHECKING, @@ -491,7 +490,7 @@ def read_excel( from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher intermediate = FactoryDispatcher.read_excel(**kwargs) - if isinstance(intermediate, (OrderedDict, dict)): + if isinstance(intermediate, dict): parsed = type(intermediate)() for key in intermediate.keys(): parsed[key] = ModinObjects.DataFrame(query_compiler=intermediate.get(key)) diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index 136a255654f..14ad75ee463 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -17,7 +17,7 @@ import os import sys import unittest.mock as mock -from collections import OrderedDict, defaultdict +from collections import defaultdict from io import BytesIO, StringIO from pathlib import Path from typing import Dict @@ -2282,7 +2282,7 @@ def test_read_excel_all_sheets(self, make_excel_file): pandas_df = pandas.read_excel(unique_filename, sheet_name=None) modin_df = pd.read_excel(unique_filename, sheet_name=None) - assert isinstance(pandas_df, (OrderedDict, dict)) + assert isinstance(pandas_df, dict) assert isinstance(modin_df, type(pandas_df)) assert pandas_df.keys() == modin_df.keys() @@ -3220,7 +3220,6 @@ def test_to_dict_dataframe(): [ pytest.param({}, id="no_kwargs"), pytest.param({"into": dict}, id="into_dict"), - pytest.param({"into": OrderedDict}, id="into_ordered_dict"), pytest.param({"into": defaultdict(list)}, id="into_defaultdict"), ], )