Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use caffeine in CachedStore and adapt ManagedInternalForm for state p… #3607

Open
wants to merge 35 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8f25433
use caffeine in CachedStore and adapt ManagedInternalForm for state p…
thoniTUB Oct 21, 2024
0b96b94
Fix serdes problem in LocalAuthenticationRealm
thoniTUB Oct 22, 2024
07e810d
adds test for CachedStore
thoniTUB Oct 22, 2024
bcf05f6
fix race condition in ManagedQuery#lastResultCount
thoniTUB Oct 22, 2024
e283fcd
adds documentation
thoniTUB Oct 22, 2024
16c4cbb
pass config on construction not on init
thoniTUB Oct 22, 2024
0ca6358
fix cache invalidation on value update
thoniTUB Oct 24, 2024
313ed5c
review changes and fixing the add behaviour
thoniTUB Oct 24, 2024
8425e58
adds metrics
thoniTUB Oct 24, 2024
db7a03f
fix double metrics recorder
thoniTUB Oct 24, 2024
26cb189
limit sending of RegisterColumnValues jobs with semaphore
thoniTUB Oct 24, 2024
4037f5c
makes storages load on start configurable
thoniTUB Nov 7, 2024
c161294
adds task to load specific storages
thoniTUB Nov 8, 2024
5532a0a
Merge branch 'develop' into feature/use-caffeine-cache
thoniTUB Nov 8, 2024
ad52330
fix serdes issue with NonPersistentStoreFactory
thoniTUB Nov 13, 2024
75276ab
Merge branch 'refs/heads/develop' into feature/use-caffeine-cache
thoniTUB Nov 13, 2024
efdd9ac
Merge remote-tracking branch 'origin/develop' into feature/use-caffei…
thoniTUB Dec 2, 2024
1631fdf
adds logging for evicted entries
thoniTUB Dec 2, 2024
c2227d4
Merge branch 'develop' into feature/use-caffeine-cache
thoniTUB Dec 4, 2024
53e8d35
use parallel deserialization in BucketManager
thoniTUB Dec 10, 2024
5e6ade9
switch back to serial registration of buckets and cblocks
thoniTUB Dec 10, 2024
aad87d9
fix missing Owner relation to ManagedExecutionId and FormConfigId
thoniTUB Dec 10, 2024
a541b79
move direct storage access out of loop
thoniTUB Dec 10, 2024
553e954
revert using id in delÃete path because it is missing the meta storage
thoniTUB Dec 10, 2024
2d32249
revert using id in delete path because it is missing the meta storage
thoniTUB Dec 10, 2024
6a8e16b
adds logs
thoniTUB Dec 11, 2024
21cfd8e
improve performance for consistency
thoniTUB Dec 11, 2024
77af16d
adds logging for cache misses
thoniTUB Dec 13, 2024
64244ea
improve query plan creation by not resolving buckets
thoniTUB Dec 13, 2024
3a7f482
improve BucketManager creation time by only loading CBlocks
thoniTUB Dec 13, 2024
d674c77
allow caching of xodus blobs
thoniTUB Dec 16, 2024
a7f8f7a
Merge branch 'develop' into feature/use-caffeine-cache
thoniTUB Dec 16, 2024
067bb73
fix default option and log
thoniTUB Dec 17, 2024
9dd290e
fix unfinished transaction finish
thoniTUB Dec 17, 2024
5190da1
renew transactions
thoniTUB Dec 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions backend/src/main/java/com/bakdata/conquery/Conquery.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.bakdata.conquery.mode.cluster.ClusterManagerProvider;
import com.bakdata.conquery.mode.local.LocalManagerProvider;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.dropwizard.configuration.JsonConfigurationFactory;
import io.dropwizard.configuration.SubstitutingSourceProvider;
Expand Down Expand Up @@ -59,12 +60,21 @@ public void initialize(Bootstrap<ConqueryConfig> bootstrap) {
bootstrap.addCommand(new RecodeStoreCommand());
bootstrap.addCommand(new MigrateCommand());

((MutableInjectableValues) confMapper.getInjectableValues()).add(Validator.class, bootstrap.getValidatorFactory().getValidator());
MutableInjectableValues injectableValues = (MutableInjectableValues) confMapper.getInjectableValues();
injectableValues.add(Validator.class, bootstrap.getValidatorFactory().getValidator());
injectableValues.add(MetricRegistry.class, bootstrap.getMetricRegistry());

// do some setup in other classes after initialization but before running a
// command
bootstrap.addBundle(new ConfiguredBundle<>() {

@Override
public void initialize(Bootstrap<?> bootstrap) {
// Allow overriding of config from environment variables.
bootstrap.setConfigurationSourceProvider(new SubstitutingSourceProvider(
bootstrap.getConfigurationSourceProvider(), StringSubstitutor.createInterpolator()));
}

@Override
public void run(ConqueryConfig configuration, Environment environment) {
configuration.configureObjectMapper(environment.getObjectMapper());
Expand All @@ -77,13 +87,6 @@ protected void configure() {
}
});
}

@Override
public void initialize(Bootstrap<?> bootstrap) {
// Allow overriding of config from environment variables.
bootstrap.setConfigurationSourceProvider(new SubstitutingSourceProvider(
bootstrap.getConfigurationSourceProvider(), StringSubstitutor.createInterpolator()));
}
});

bootstrap.addBundle(new PrometheusBundle());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ public class QueryProcessor {
private Validator validator;


public Stream<? extends ExecutionStatus> getAllQueries(Dataset dataset, HttpServletRequest req, Subject subject, boolean allProviders) {
final Stream<ManagedExecution> allQueries = storage.getAllExecutions();

return getQueriesFiltered(dataset.getId(), RequestAwareUriBuilder.fromRequest(req), subject, allQueries, allProviders);
public List<? extends ExecutionStatus> getAllQueries(Dataset dataset, HttpServletRequest req, Subject subject, boolean allProviders) {
try(Stream<ManagedExecution> allQueries = storage.getAllExecutions()) {
return getQueriesFiltered(dataset.getId(), RequestAwareUriBuilder.fromRequest(req), subject, allQueries, allProviders).toList();
}
}

public Stream<? extends ExecutionStatus> getQueriesFiltered(DatasetId datasetId, UriBuilder uriBuilder, Subject subject, Stream<ManagedExecution> allQueries, boolean allProviders) {
Expand Down Expand Up @@ -216,7 +216,7 @@ public void cancel(Subject subject, ManagedExecution query) {

log.info("User[{}] cancelled Query[{}]", subject.getId(), query.getId());

executionManager.cancelQuery(query);
executionManager.cancelExecution(query);
}

public void patchQuery(Subject subject, ManagedExecution execution, MetaDataPatch patch) {
Expand Down Expand Up @@ -269,18 +269,18 @@ public void reexecute(Subject subject, ManagedExecution query) {
if (!query.getState().equals(ExecutionState.RUNNING)) {
final Namespace namespace = query.getNamespace();

namespace.getExecutionManager().execute(query, config);
namespace.getExecutionManager().execute(query);
}
}

public void deleteQuery(Subject subject, ManagedExecution execution) {
log.info("User[{}] deleted Query[{}]", subject.getId(), execution.getId());
public void deleteQuery(Subject subject, ManagedExecutionId execution) {
log.info("User[{}] deleted Query[{}]", subject.getId(), execution);

datasetRegistry.get(execution.getDataset())
.getExecutionManager() // Don't go over execution#getExecutionManager() as that's only set when query is initialized
.clearQueryResults(execution);

storage.removeExecution(execution.getId());
storage.removeExecution(execution);
}

public ExecutionState awaitDone(ManagedExecution query, int time, TimeUnit unit) {
Expand All @@ -291,7 +291,7 @@ public ExecutionState awaitDone(ManagedExecution query, int time, TimeUnit unit)
public FullExecutionStatus getQueryFullStatus(ManagedExecution query, Subject subject, UriBuilder url, Boolean allProviders) {
final Namespace namespace = datasetRegistry.get(query.getDataset());

query.initExecutable(config);
query.initExecutable();

final FullExecutionStatus status = query.buildStatusFull(subject, namespace);

Expand Down Expand Up @@ -338,7 +338,7 @@ public ExternalUploadResult uploadEntities(Subject subject, Dataset dataset, Ext
execution.setLabel(upload.getLabel());
}

execution.initExecutable(config);
execution.initExecutable();

return new ExternalUploadResult(execution.getId(), statistic.getResolved().size(), statistic.getUnresolvedId(), statistic.getUnreadableDate());
}
Expand Down Expand Up @@ -375,9 +375,12 @@ public FullExecutionStatus getSingleEntityExport(Subject subject, UriBuilder uri
throw new ConqueryError.ExecutionProcessingError();
}

// Workaround update our execution as the lastresultcount was set in the background
final EntityPreviewExecution executionFinished = (EntityPreviewExecution) execution.getId().resolve();
executionFinished.initExecutable();

final FullExecutionStatus status = execution.buildStatusFull(subject, namespace);
status.setResultUrls(getResultAssets(config.getResultProviders(), execution, uriBuilder, false));
status.setResultUrls(getResultAssets(config.getResultProviders(), executionFinished, uriBuilder, false));
return status;
}

Expand Down Expand Up @@ -434,21 +437,21 @@ public ManagedExecution postQuery(Dataset dataset, QueryDescription query, Subje

final Optional<ManagedExecution>
execution =
executionId.map(id -> tryReuse(query, id, namespace, config, executionManager, subject.getUser()));
executionId.map(id -> tryReuse(query, id, namespace, executionManager, subject.getUser()));

if (execution.isPresent()) {
return execution.get();
}
}

// Execute the query
return executionManager.runQuery(namespace, query, subject.getId(), config, system);
return executionManager.runQuery(namespace, query, subject.getId(), system);
}

/**
* Determine if the submitted query does reuse ONLY another query and restart that instead of creating another one.
*/
private ManagedExecution tryReuse(QueryDescription query, ManagedExecutionId executionId, Namespace namespace, ConqueryConfig config, ExecutionManager executionManager, User user) {
private ManagedExecution tryReuse(QueryDescription query, ManagedExecutionId executionId, Namespace namespace, ExecutionManager executionManager, User user) {

ManagedExecution execution = storage.getExecution(executionId);

Expand Down Expand Up @@ -491,7 +494,7 @@ private ManagedExecution tryReuse(QueryDescription query, ManagedExecutionId exe

log.trace("Re-executing Query {}", execution);

executionManager.execute(execution, config);
executionManager.execute(execution);

return execution;

Expand Down Expand Up @@ -577,7 +580,7 @@ public Stream<Map<String, String>> resolveEntities(Subject subject, List<FilterV
.filter(Predicate.not(Map::isEmpty));
}

public ResultStatistics getResultStatistics(SingleTableResult managedQuery) {
public <E extends ManagedExecution & SingleTableResult> ResultStatistics getResultStatistics(E managedQuery) {

final Locale locale = I18n.LOCALE.get();
final NumberFormat decimalFormat = NumberFormat.getNumberInstance(locale);
Expand All @@ -590,6 +593,8 @@ public ResultStatistics getResultStatistics(SingleTableResult managedQuery) {
new PrintSettings(true, locale, managedQuery.getNamespace(), config, null, null, decimalFormat, integerFormat);
final UniqueNamer uniqueNamer = new UniqueNamer(printSettings);

managedQuery.initExecutable();

final List<ResultInfo> resultInfos = managedQuery.getResultInfos();

final Optional<ResultInfo>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.bakdata.conquery.io.cps.CPSTypeIdResolver;
import com.bakdata.conquery.io.cps.SubTyped;
import com.bakdata.conquery.io.storage.MetaStorage;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.execution.ManagedExecution;
import com.bakdata.conquery.models.forms.frontendconfiguration.FormScanner;
import com.bakdata.conquery.models.forms.frontendconfiguration.FormType;
Expand Down Expand Up @@ -133,8 +134,8 @@ public String getFormType() {
}

@Override
public ManagedExecution toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry) {
return new ExternalExecution(this, user, submittedDataset, storage, datasetRegistry);
public ManagedExecution toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry, ConqueryConfig config) {
return new ExternalExecution(this, user, submittedDataset, storage, datasetRegistry, config);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.bakdata.conquery.internationalization.ExportFormC10n;
import com.bakdata.conquery.io.cps.CPSType;
import com.bakdata.conquery.io.storage.MetaStorage;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.forms.managed.ManagedInternalForm;
import com.bakdata.conquery.models.forms.util.Alignment;
import com.bakdata.conquery.models.forms.util.Resolution;
Expand Down Expand Up @@ -136,8 +137,9 @@ public String getLocalizedTypeLabel() {
}

@Override
public ManagedInternalForm<ExportForm> toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry) {
return new ManagedInternalForm<>(this, user, submittedDataset, storage, datasetRegistry);
public ManagedInternalForm<ExportForm> toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry,
ConqueryConfig config) {
return new ManagedInternalForm<>(this, user, submittedDataset, storage, datasetRegistry, config);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.bakdata.conquery.io.cps.CPSType;
import com.bakdata.conquery.io.storage.MetaStorage;
import com.bakdata.conquery.models.common.Range;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.forms.managed.ManagedInternalForm;
import com.bakdata.conquery.models.i18n.I18n;
import com.bakdata.conquery.models.identifiable.ids.specific.DatasetId;
Expand Down Expand Up @@ -126,7 +127,8 @@ public String getLocalizedTypeLabel() {


@Override
public ManagedInternalForm<FullExportForm> toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry) {
return new ManagedInternalForm<>(this, user, submittedDataset, storage, datasetRegistry);
public ManagedInternalForm<FullExportForm> toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry,
ConqueryConfig config) {
return new ManagedInternalForm<>(this, user, submittedDataset, storage, datasetRegistry, config);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import java.util.stream.Stream;

import com.bakdata.conquery.io.storage.MetaStorage;
import com.bakdata.conquery.models.execution.ExecutionState;
import com.bakdata.conquery.models.execution.ManagedExecution;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.identifiable.ids.specific.DatasetId;
import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId;
import com.bakdata.conquery.models.identifiable.ids.specific.UserId;
import com.bakdata.conquery.models.query.ExecutionManager;
import com.bakdata.conquery.models.query.ManagedQuery;
import com.bakdata.conquery.models.query.QueryPlanContext;
import com.bakdata.conquery.models.query.QueryResolveContext;
Expand All @@ -27,8 +25,6 @@ public abstract class Query implements QueryDescription {

public abstract QueryPlan<?> createQueryPlan(QueryPlanContext context);

public abstract void collectRequiredQueries(Set<ManagedExecutionId> requiredQueries);

@Override
public abstract void resolve(QueryResolveContext context);

Expand All @@ -38,12 +34,14 @@ public Set<ManagedExecutionId> collectRequiredQueries() {
return set;
}

public abstract void collectRequiredQueries(Set<ManagedExecutionId> requiredQueries);

@JsonIgnore
public abstract List<ResultInfo> getResultInfos();

@Override
public ManagedQuery toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry) {
return new ManagedQuery(this, user, submittedDataset, storage, datasetRegistry);
public ManagedQuery toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry, ConqueryConfig config) {
return new ManagedQuery(this, user, submittedDataset, storage, datasetRegistry, config);
}

/**
Expand All @@ -59,7 +57,6 @@ public CQElement getReusableComponents() {
*
* @param results
* @return the number of results in the result List.
* @see ManagedExecution#finish(ExecutionState, ExecutionManager) for how it's used.
*/
public long countResults(Stream<EntityResult> results) {
return results.map(EntityResult::listResultLines)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.bakdata.conquery.io.storage.MetaStorage;
import com.bakdata.conquery.models.auth.entities.Subject;
import com.bakdata.conquery.models.auth.permissions.Ability;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.datasets.concepts.Concept;
import com.bakdata.conquery.models.datasets.concepts.ConceptElement;
Expand Down Expand Up @@ -45,13 +46,11 @@ public interface QueryDescription extends Visitable {
* @param user
* @param submittedDataset
* @param storage
* @param config
* @return
*/
ManagedExecution toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry);
ManagedExecution toManagedExecution(UserId user, DatasetId submittedDataset, MetaStorage storage, DatasetRegistry<?> datasetRegistry, ConqueryConfig config);


Set<ManagedExecutionId> collectRequiredQueries();

/**
* Initializes a submitted description using the provided context.
* All parameters that are set in this phase must be annotated with {@link com.bakdata.conquery.io.jackson.View.InternalCommunication}.
Expand All @@ -61,14 +60,14 @@ public interface QueryDescription extends Visitable {

/**
* Allows the implementation to add visitors that traverse the QueryTree.
* All visitors are concatenated so only a single traverse needs to be done.
* All visitors are concatenated so only a single traverse needs to be done.
* @param visitors The structure to which new visitors need to be added.
*/
default void addVisitors(@NonNull List<QueryVisitor> visitors) {
// Register visitors for permission checks
visitors.add(new QueryUtils.ExternalIdChecker());
}

/**
* Check implementation specific permissions. Is called after all visitors have been registered and executed.
*/
Expand Down Expand Up @@ -111,6 +110,8 @@ static void authorizeQuery(QueryDescription queryDescription, Subject subject, D
}
}

Set<ManagedExecutionId> collectRequiredQueries();

default RequiredEntities collectRequiredEntities(QueryExecutionContext context){
return new RequiredEntities(context.getBucketManager().getEntities());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ public QPNode createQueryPlan(QueryPlanContext context, ConceptQueryPlan plan) {

final List<Aggregator<?>> connectorAggregators = createAggregators(plan, table.getSelects());

// Exists aggregators hold a reference to their parent FiltersNode so they need to be treated separately.
// Exists aggregators hold a reference to their parent FiltersNode, so they need to be treated separately.
// They also don't need aggregation as they simply imitate their reference.
final List<ExistsAggregator> existsAggregators =
connectorAggregators.stream()
Expand All @@ -209,7 +209,7 @@ public QPNode createQueryPlan(QueryPlanContext context, ConceptQueryPlan plan) {


final List<Aggregator<CDateSet>> eventDateUnionAggregators =
aggregateEventDates ? List.of(new EventDateUnionAggregator(Set.of(table.getConnector().<Connector>resolve().getResolvedTable())))
aggregateEventDates ? List.of(new EventDateUnionAggregator(Set.of(table.getConnector().resolve().getResolvedTable())))
: Collections.emptyList();

aggregators.addAll(eventDateUnionAggregators);
Expand Down Expand Up @@ -285,7 +285,7 @@ public RequiredEntities collectRequiredEntities(QueryExecutionContext context) {
*/
private static List<Aggregator<?>> createAggregators(ConceptQueryPlan plan, List<? extends SelectId> selects) {
return selects.stream()
.map(SelectId::<Select>resolve)
.map(SelectId::resolve)
.map(Select::createAggregator)
.peek(plan::registerAggregator)
.collect(Collectors.toList());
Expand Down
Loading
Loading