Skip to content

Commit

Permalink
remove ordering by sys__id
Browse files Browse the repository at this point in the history
  • Loading branch information
mattseddon committed Oct 9, 2024
1 parent 2e00c66 commit fc7c27f
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 23 deletions.
2 changes: 0 additions & 2 deletions src/datachain/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,8 +1325,6 @@ def ls_dataset_rows(
if offset:
q = q.offset(offset)

q = q.order_by("sys__id")

return q.to_db_records()

def signed_url(self, source: str, path: str, client_config=None) -> str:
Expand Down
4 changes: 0 additions & 4 deletions src/datachain/data_storage/warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ def dataset_select_paginated(
limit = query._limit
paginated_query = query.limit(page_size)

if not paginated_query._order_by_clauses:
# default order by is order by `sys__id`
paginated_query = paginated_query.order_by(query.selected_columns.sys__id)

results = None
offset = 0
num_yielded = 0
Expand Down
1 change: 0 additions & 1 deletion src/datachain/query/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def __call__(

ordered_query = query.order_by(None).order_by(
PARTITION_COLUMN_ID,
"sys__id",
*query._order_by_clauses,
)

Expand Down
16 changes: 0 additions & 16 deletions src/datachain/query/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,10 +591,6 @@ def process_input_query(self, query: Select) -> tuple[Select, list["Table"]]:
return query, []
table = self.catalog.warehouse.create_pre_udf_table(query)
q: Select = sqlalchemy.select(*table.c)
if query._order_by_clauses:
# we are adding ordering only if it's explicitly added by user in
# query part before adding signals
q = q.order_by(table.c.sys__id)
return q, [table]

def create_result_query(
Expand Down Expand Up @@ -630,11 +626,6 @@ def q(*columns):
else:
res = sqlalchemy.select(*cols1).select_from(subq)

if query._order_by_clauses:
# if ordering is used in query part before adding signals, we
# will have it as order by id from select from pre-created udf table
res = res.order_by(subq.c.sys__id)

if self.partition_by is not None:
subquery = res.subquery()
res = sqlalchemy.select(*subquery.c).select_from(subquery)
Expand Down Expand Up @@ -666,13 +657,6 @@ def create_udf_table(self, query: Select) -> "Table":
def create_result_query(
self, udf_table, query: Select
) -> tuple[QueryGeneratorFunc, list["sqlalchemy.Column"]]:
if not query._order_by_clauses:
# if we are not selecting all rows in UDF, we need to ensure that
# we get the same rows as we got as inputs of UDF since selecting
# without ordering can be non deterministic in some databases
c = query.selected_columns
query = query.order_by(c.sys__id)

udf_table_query = udf_table.select().subquery()
udf_table_cols: list[sqlalchemy.Label[Any]] = [
label(c.name, c) for c in udf_table_query.columns
Expand Down

0 comments on commit fc7c27f

Please sign in to comment.