Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement pagination for list APIs #273

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ce1782b
check in
eric-maynard Sep 5, 2024
63ef916
refactor
eric-maynard Sep 5, 2024
609e7bf
make tests stable
eric-maynard Sep 5, 2024
964bd53
still chasing compile errors
eric-maynard Sep 6, 2024
0ce73af
tests stabler
eric-maynard Sep 6, 2024
cb4a8e2
wip
eric-maynard Sep 6, 2024
fb21e9b
finishing integration
eric-maynard Sep 6, 2024
a725497
many fixes
eric-maynard Sep 6, 2024
2a99045
more signature fixes
eric-maynard Sep 6, 2024
14b0ef3
tests maybe working
eric-maynard Sep 6, 2024
8339e52
tested
eric-maynard Sep 6, 2024
dcaf798
add config, fixes
eric-maynard Sep 6, 2024
6b1c362
improvements
eric-maynard Sep 6, 2024
ccd0713
implement checksum
eric-maynard Sep 6, 2024
feb8c29
more tests
eric-maynard Sep 6, 2024
d649725
one tweak
eric-maynard Sep 9, 2024
c14797f
add another test case
eric-maynard Sep 9, 2024
8403076
add another test case
eric-maynard Sep 9, 2024
724948f
resolve conflicts
eric-maynard Sep 9, 2024
fe0acc8
check in
eric-maynard Sep 9, 2024
1100f71
check in after major refactor
eric-maynard Sep 10, 2024
3fc5939
close, issue with loadtasks
eric-maynard Sep 10, 2024
549c5ab
revert
eric-maynard Sep 10, 2024
d5f127b
re introduce client side filtering
eric-maynard Sep 10, 2024
342eb89
stable tests
eric-maynard Sep 10, 2024
af1b085
lint
eric-maynard Sep 10, 2024
0b313ae
one fix
eric-maynard Sep 10, 2024
b8f4342
fix conflicts
eric-maynard Sep 10, 2024
cd74b58
fix tests
eric-maynard Sep 10, 2024
778c60e
autolint
eric-maynard Sep 11, 2024
e82402b
doc fixes
eric-maynard Sep 11, 2024
eedf8c7
autolint
eric-maynard Sep 11, 2024
54c478f
lots of doc changes
eric-maynard Sep 11, 2024
2e08f0d
autolint
eric-maynard Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.catalog.pagination.EntityIdPageToken;
import org.apache.polaris.core.catalog.pagination.PageToken;
import org.apache.polaris.core.catalog.pagination.PolarisPage;
import org.apache.polaris.core.context.RealmContext;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisChangeTrackingVersions;
Expand Down Expand Up @@ -501,28 +504,31 @@ public List<PolarisEntityActiveRecord> lookupEntityActiveBatch(

/** {@inheritDoc} */
@Override
public @NotNull List<PolarisEntityActiveRecord> listActiveEntities(
public @NotNull PolarisPage<PolarisEntityActiveRecord> listActiveEntities(
@NotNull PolarisCallContext callCtx,
long catalogId,
long parentId,
@NotNull PolarisEntityType entityType) {
return listActiveEntities(callCtx, catalogId, parentId, entityType, Predicates.alwaysTrue());
@NotNull PolarisEntityType entityType,
@NotNull PageToken pageToken) {
return listActiveEntities(
callCtx, catalogId, parentId, entityType, pageToken, Predicates.alwaysTrue());
}

@Override
public @NotNull List<PolarisEntityActiveRecord> listActiveEntities(
public @NotNull PolarisPage<PolarisEntityActiveRecord> listActiveEntities(
@NotNull PolarisCallContext callCtx,
long catalogId,
long parentId,
@NotNull PolarisEntityType entityType,
@NotNull PageToken pageToken,
@NotNull Predicate<PolarisBaseEntity> entityFilter) {
// full range scan under the parent for that type
return listActiveEntities(
callCtx,
catalogId,
parentId,
entityType,
Integer.MAX_VALUE,
pageToken,
entityFilter,
entity ->
new PolarisEntityActiveRecord(
Expand All @@ -535,23 +541,53 @@ public List<PolarisEntityActiveRecord> lookupEntityActiveBatch(
}

@Override
public @NotNull <T> List<T> listActiveEntities(
public @NotNull <T> PolarisPage<T> listActiveEntities(
@NotNull PolarisCallContext callCtx,
long catalogId,
long parentId,
@NotNull PolarisEntityType entityType,
int limit,
@NotNull PageToken pageToken,
@NotNull Predicate<PolarisBaseEntity> entityFilter,
@NotNull Function<PolarisBaseEntity, T> transformer) {
// full range scan under the parent for that type
return this.store
.lookupFullEntitiesActive(localSession.get(), catalogId, parentId, entityType)
.stream()
.map(ModelEntity::toEntity)
.filter(entityFilter)
.limit(limit)
.map(transformer)
.collect(Collectors.toList());

List<T> data;
if (entityFilter.equals(Predicates.alwaysTrue())) {
// In this case, we can push the filter down into the query
data =
this.store
.lookupFullEntitiesActive(
localSession.get(), catalogId, parentId, entityType, pageToken)
.stream()
.map(ModelEntity::toEntity)
.filter(entityFilter)
.map(transformer)
.collect(Collectors.toList());
} else {
// In this case, we cannot push the filter down into the query. We must therefore remove
// the page size limit from the PageToken and filter on the client side.
// TODO Implement a generic predicate that can be pushed down into different metastores
PageToken unlimitedPageSizeToken = pageToken.withPageSize(Integer.MAX_VALUE);
List<ModelEntity> rawData =
this.store.lookupFullEntitiesActive(
localSession.get(), catalogId, parentId, entityType, unlimitedPageSizeToken);
if (pageToken.pageSize < Integer.MAX_VALUE && rawData.size() > pageToken.pageSize) {
LOGGER.info(
"A page token could not be respected due to a predicate. "
+ "{} records were read but the client was asked to return {}.",
rawData.size(),
pageToken.pageSize);
}

data =
rawData.stream()
.map(ModelEntity::toEntity)
.filter(entityFilter)
.limit(pageToken.pageSize)
.map(transformer)
.collect(Collectors.toList());
}

return pageToken.buildNextPage(data);
}

/** {@inheritDoc} */
Expand Down Expand Up @@ -761,4 +797,9 @@ public void rollback() {
session.getTransaction().rollback();
}
}

@Override
public @NotNull PageToken.PageTokenBuilder<?> pageTokenBuilder() {
return EntityIdPageToken.builder();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
import java.util.List;
import java.util.stream.Collectors;
import org.apache.polaris.core.PolarisDiagnostics;
import org.apache.polaris.core.catalog.pagination.EntityIdPageToken;
import org.apache.polaris.core.catalog.pagination.PageToken;
import org.apache.polaris.core.catalog.pagination.ReadEverythingPageToken;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisEntitiesActiveKey;
import org.apache.polaris.core.entity.PolarisEntityActiveRecord;
Expand Down Expand Up @@ -274,19 +277,39 @@ long countActiveChildEntities(
}

List<ModelEntity> lookupFullEntitiesActive(
EntityManager session, long catalogId, long parentId, @NotNull PolarisEntityType entityType) {
EntityManager session,
long catalogId,
long parentId,
@NotNull PolarisEntityType entityType,
@NotNull PageToken pageToken) {
diagnosticServices.check(session != null, "session_is_null");
diagnosticServices.check(
(pageToken instanceof EntityIdPageToken || pageToken instanceof ReadEverythingPageToken),
"unexpected_page_token");

// Currently check against ENTITIES not joining with ENTITIES_ACTIVE
String hql =
"SELECT m from ModelEntity m where m.catalogId=:catalogId and m.parentId=:parentId and m.typeCode=:typeCode";
"SELECT m from ModelEntity m "
+ "where m.catalogId=:catalogId and m.parentId=:parentId and m.typeCode=:typeCode and m.id > :tokenId";

if (pageToken instanceof EntityIdPageToken) {
hql += " order by m.id asc";
}

TypedQuery<ModelEntity> query =
session
.createQuery(hql, ModelEntity.class)
.setParameter("catalogId", catalogId)
.setParameter("parentId", parentId)
.setParameter("typeCode", entityType.getCode());
.setParameter("typeCode", entityType.getCode())
.setParameter("tokenId", -1L);

if (pageToken instanceof EntityIdPageToken) {
query =
query
.setParameter("tokenId", ((EntityIdPageToken) pageToken).id)
.setMaxResults(pageToken.pageSize);
}

return query.getResultList();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,12 @@ public static <T> Builder<T> builder() {
"If set to true, allows tables to be dropped with the purge parameter set to true.")
.defaultValue(true)
.build();

public static final PolarisConfiguration<Boolean> LIST_PAGINATION_ENABLED =
PolarisConfiguration.<Boolean>builder()
.key("LIST_PAGINATION_ENABLED")
.catalogConfig("list-pagination.enabled")
.description("If set to true, pagination for APIs like listTables is enabled")
.defaultValue(false)
.build();
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public interface PolarisConfigurationStore {

if (config.defaultValue instanceof Boolean) {
return config.cast(Boolean.valueOf(String.valueOf(value)));
} else if (config.defaultValue instanceof Integer) {
return config.cast(Integer.valueOf(value.toString()));
} else {
return config.cast(value);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.catalog.pagination;

import java.util.List;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.persistence.models.ModelEntity;

/**
* A {@link PageToken} implementation that tracks the greatest ID from either {@link
* PolarisBaseEntity} or {@link ModelEntity} objects supplied in updates. Entities are meant to be
* filtered during listing such that only entities with and ID greater than the ID of the token are
* returned.
*/
public class EntityIdPageToken extends PageToken {
public long id;

private EntityIdPageToken(long id, int pageSize) {
this.id = id;
this.pageSize = pageSize;
validate();
}

/** The minimum ID that could be attached to an entity */
private static final long MINIMUM_ID = 0;

/** The entity ID to use to start with. */
private static final long BASE_ID = MINIMUM_ID - 1;

@Override
protected List<String> getComponents() {
return List.of(String.valueOf(id), String.valueOf(pageSize));
}

/** Get a new `EntityIdPageTokenBuilder` instance */
public static PageTokenBuilder<EntityIdPageToken> builder() {
return new EntityIdPageToken.EntityIdPageTokenBuilder();
}

@Override
protected PageTokenBuilder<?> getBuilder() {
return EntityIdPageToken.builder();
}

/** A {@link PageTokenBuilder} implementation for {@link EntityIdPageToken} */
public static class EntityIdPageTokenBuilder extends PageTokenBuilder<EntityIdPageToken> {

@Override
public String tokenPrefix() {
return "polaris-entity-id";
}

@Override
public int expectedComponents() {
// id, pageSize
return 2;
}

@Override
protected EntityIdPageToken fromStringComponents(List<String> components) {
return new EntityIdPageToken(
Integer.parseInt(components.get(0)), Integer.parseInt(components.get(1)));
}

@Override
protected EntityIdPageToken fromLimitImpl(int limit) {
return new EntityIdPageToken(BASE_ID, limit);
}
}

@Override
public PageToken updated(List<?> newData) {
if (newData == null || newData.size() < this.pageSize) {
return PageToken.DONE;
} else {
var head = newData.get(0);
if (head instanceof ModelEntity) {
return new EntityIdPageToken(
((ModelEntity) newData.get(newData.size() - 1)).getId(), this.pageSize);
} else if (head instanceof PolarisBaseEntity) {
return new EntityIdPageToken(
((PolarisBaseEntity) newData.get(newData.size() - 1)).getId(), this.pageSize);
} else {
throw new IllegalArgumentException(
"Cannot build a page token from: " + newData.get(0).getClass().getSimpleName());
}
}
}

@Override
public PageToken withPageSize(Integer pageSize) {
if (pageSize == null) {
return new EntityIdPageToken(BASE_ID, this.pageSize);
} else {
return new EntityIdPageToken(this.id, pageSize);
}
}
}
Loading