Skip to content

Commit

Permalink
perf(graphql): add configurable tag selectivity sorting PE-7054
Browse files Browse the repository at this point in the history
This change adds support for configuring tag join order via a
TAG_SELECTIVITY environment variable containing JSON mapping tag names
to selectivity weights. This is used when constructing SQLite tag join
SQL to determine the order in which to perform the joins (most selective
first). It also alters the table used to order results. If no tag is
present with a selectivity weight of >= 0 and there is an owner or
target address specified on the query the transactions or data item
tables are used for sorting rather than the tags table. This prevents a
lot of index scaning when low selectivity tags are combined with high a
high selectivity owner/target address filter.
  • Loading branch information
djwhitt committed Nov 12, 2024
1 parent 4f3424f commit cf4f07a
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 13 deletions.
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ services:
- WRITE_TRANSACTION_DB_SIGNATURES=${WRITE_TRANSACTION_DB_SIGNATURES:-}
- ENABLE_DATA_DB_WAL_CLEANUP=${ENABLE_DATA_DB_WAL_CLEANUP:-}
- MAX_DATA_ITEM_QUEUE_SIZE=${MAX_DATA_ITEM_QUEUE_SIZE:-}
- TAG_SELECTIVITY=${TAG_SELECTIVITY:-}
networks:
- ar-io-network
depends_on:
Expand Down
1 change: 1 addition & 0 deletions docs/envs.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ This document describes the environment variables that can be used to configure
| CONTIGUOUS_DATA_CACHE_CLEANUP_THRESHOLD | Number | undefined | Sets the age threshold in seconds; files older than this are candidates for contiguous data cache cleanup |
| ENABLE_MEMPOOL_WATCHER | Boolean | false | If true, the observer will start indexing pending tx from the mempool |
| MEMPOOL_POLLING_INTERVAL_MS | Number | 30000 | Sets the mempool polling interval in milliseconds |
| TAG_SELECTIVITY | String | Refer to config.ts | A JSON map of tag names to selectivity weights used to order SQLite tag joins |
| AR_IO_SQLITE_BACKUP_S3_BUCKET_NAME | String | "" | S3-compatible bucket name, used by the Litestream backup service |
| AR_IO_SQLITE_BACKUP_S3_BUCKET_REGION | String | "" | S3-compatible bucket region, used by the Litestream backup service |
| AR_IO_SQLITE_BACKUP_S3_BUCKET_ACCESS_KEY | String | "" | S3-compatible bucket access_key credential, used by Litestream backup service, omit if using resource-based IAM role |
Expand Down
26 changes: 24 additions & 2 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,26 @@ export const MAX_DATA_ITEM_QUEUE_SIZE = +env.varOrDefault(
'100000',
);

//
// GraphQL
//

export const TAG_SELECTIVITY = JSON.parse(
env.varOrDefault(
'TAG_SELECTIVITY',
JSON.stringify({
'Parent-Folder-Id': 20,
Message: 20,
'Drive-Id': 10,
Process: 10,
Recipient: 10,
'App-Name': -10,
'Content-Type': -10,
'Data-Protocol': -10,
}),
),
) as Record<string, number>;

// ClickHouse URL
export const CLICKHOUSE_URL = env.varOrUndefined('CLICKHOUSE_URL');

Expand Down Expand Up @@ -322,7 +342,7 @@ export const TRUSTED_ARNS_GATEWAY_URL = env.varOrUndefined(
//
// Mempool watcher
//
//

export const ENABLE_MEMPOOL_WATCHER =
env.varOrDefault('ENABLE_MEMPOOL_WATCHER', 'false') === 'true';

Expand All @@ -334,7 +354,7 @@ export const MEMPOOL_POLLING_INTERVAL_MS = +env.varOrDefault(
//
// AWS settings
//
//

export const AWS_ACCESS_KEY_ID = env.varOrUndefined('AWS_ACCESS_KEY_ID');
export const AWS_SECRET_ACCESS_KEY = env.varOrUndefined(
'AWS_SECRET_ACCESS_KEY',
Expand Down Expand Up @@ -363,7 +383,9 @@ export const GET_DATA_CIRCUIT_BREAKER_TIMEOUT_MS = +env.varOrDefault(
'500',
);

//
// AO
//

export const AO_MU_URL = env.varOrUndefined('AO_MU_URL');
export const AO_CU_URL = env.varOrUndefined('AO_CU_URL');
Expand Down
40 changes: 29 additions & 11 deletions src/database/standalone-sqlite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,6 @@ const STABLE_FLUSH_INTERVAL = 5;
const NEW_TX_CLEANUP_WAIT_SECS = 60 * 60 * 2;
const NEW_DATA_ITEM_CLEANUP_WAIT_SECS = 60 * 60 * 2;
const BUNDLE_REPROCESS_WAIT_SECS = 60 * 15;
const LOW_SELECTIVITY_TAG_NAMES = new Set(['App-Name', 'Content-Type']);

function tagJoinSortPriority(tag: { name: string; values: string[] }) {
return LOW_SELECTIVITY_TAG_NAMES.has(tag.name) ? 1 : 0;
}

export function encodeTransactionGqlCursor({
height,
Expand Down Expand Up @@ -410,6 +405,8 @@ export class StandaloneSqliteDatabaseWorker {

private insertDataHashCache: NodeCache;

private tagSelectivity: Record<string, number>;

// Transactions
resetBundlesToHeightFn: Sqlite.Transaction;
resetCoreToHeightFn: Sqlite.Transaction;
Expand All @@ -427,12 +424,14 @@ export class StandaloneSqliteDatabaseWorker {
dataDbPath,
moderationDbPath,
bundlesDbPath,
tagSelectivity,
}: {
log: winston.Logger;
coreDbPath: string;
dataDbPath: string;
moderationDbPath: string;
bundlesDbPath: string;
tagSelectivity: Record<string, number>;
}) {
this.log = log;

Expand Down Expand Up @@ -772,6 +771,8 @@ export class StandaloneSqliteDatabaseWorker {
checkperiod: 60, // 1 minute
useClones: false,
});

this.tagSelectivity = tagSelectivity;
}

getMaxHeight() {
Expand Down Expand Up @@ -1469,16 +1470,29 @@ export class StandaloneSqliteDatabaseWorker {
}

if (tags) {
// To improve performance, force tags with large result sets to be last
const sortByTagJoinPriority = R.sortBy(tagJoinSortPriority);
sortByTagJoinPriority(tags).forEach((tag, index) => {
// Order tag joins by selectivity (most selective first) to narrow
// results as early as possible
const sortByTagSelectivity = R.sortBy(
(tag: { name: string; values: string[] }) => {
return -(this.tagSelectivity[tag.name] ?? 0);
},
);
sortByTagSelectivity(tags).forEach((tag, index) => {
const tagAlias = `"${index}_${index}"`;
let joinCond: { [key: string]: string };
if (source === 'stable_txs' || source === 'stable_items') {
if (index === 0) {
heightSortTableAlias = tagAlias;
blockTransactionIndexSortTableAlias = tagAlias;
dataItemSortTableAlias = tagAlias;
if (
// Order results by selective tags ...
this.tagSelectivity[tag.name] >= 0 ||
// ... or non-selective tags if neither recipients nor owners
// were specified
(recipients?.length === 0 && owners?.length === 0)
) {
heightSortTableAlias = tagAlias;
blockTransactionIndexSortTableAlias = tagAlias;
dataItemSortTableAlias = tagAlias;
}
joinCond = {
[`${blockTransactionIndexTableAlias}.block_transaction_index`]: `${tagAlias}.block_transaction_index`,
[`${heightTableAlias}.height`]: `${tagAlias}.height`,
Expand Down Expand Up @@ -2489,12 +2503,14 @@ export class StandaloneSqliteDatabase
dataDbPath,
moderationDbPath,
bundlesDbPath,
tagSelectivity,
}: {
log: winston.Logger;
coreDbPath: string;
dataDbPath: string;
moderationDbPath: string;
bundlesDbPath: string;
tagSelectivity: Record<string, number>;
}) {
this.log = log.child({ class: `${this.constructor.name}` });

Expand Down Expand Up @@ -2580,6 +2596,7 @@ export class StandaloneSqliteDatabase
dataDbPath,
moderationDbPath,
bundlesDbPath,
tagSelectivity: tagSelectivity,
},
});

Expand Down Expand Up @@ -3068,6 +3085,7 @@ if (!isMainThread) {
dataDbPath: workerData.dataDbPath,
moderationDbPath: workerData.moderationDbPath,
bundlesDbPath: workerData.bundlesDbPath,
tagSelectivity: workerData.tagSelectivity,
});

let errorCount = 0;
Expand Down
1 change: 1 addition & 0 deletions src/system.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ export const db = new StandaloneSqliteDatabase({
dataDbPath: 'data/sqlite/data.db',
moderationDbPath: 'data/sqlite/moderation.db',
bundlesDbPath: 'data/sqlite/bundles.db',
tagSelectivity: config.TAG_SELECTIVITY,
});

export const chainIndex: ChainIndex = db;
Expand Down

0 comments on commit cf4f07a

Please sign in to comment.