From b4418f75e4e2a4df0f00493f7137ef7170a09e2a Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 15 May 2023 17:37:13 -0500 Subject: [PATCH 01/33] feat(bundles): add bundle/data item GQL index schema PE-3769 Adds the DB schema required for indexing data items for GraphQL querying. Also includes a table for tracking bundle status (processed_at + data_item_count). Bundles use a separate SQLite DB (similar to data) to reduce lock contention and support greater bootstrapping flexibility. --- ...023.05.15T18.06.50.bundles.init-schema.sql | 125 ++++++++++++++++++ ...023.05.15T18.06.50.bundles.init-schema.sql | 9 ++ 2 files changed, 134 insertions(+) create mode 100644 migrations/2023.05.15T18.06.50.bundles.init-schema.sql create mode 100644 migrations/down/2023.05.15T18.06.50.bundles.init-schema.sql diff --git a/migrations/2023.05.15T18.06.50.bundles.init-schema.sql b/migrations/2023.05.15T18.06.50.bundles.init-schema.sql new file mode 100644 index 00000000..cdc6ac5a --- /dev/null +++ b/migrations/2023.05.15T18.06.50.bundles.init-schema.sql @@ -0,0 +1,125 @@ +CREATE TABLE IF NOT EXISTS bundle_formats ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL +); + +INSERT INTO bundle_formats (id, name) VALUES (0, 'ans-102'); +INSERT INTO bundle_formats (id, name) VALUES (1, 'ans-104'); + +CREATE TABLE IF NOT EXISTS bundles ( + id BLOB PRIMARY KEY, + format INTEGER NOT NULL, + data_item_count INTEGER NOT NULL, + first_processed_at INTEGER NOT NULL, + last_processed_at INTEGER NOT NULL +); + +CREATE TABLE IF NOT EXISTS bundle_data_items ( + id BLOB, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id, parent_id) +); + +CREATE TABLE IF NOT EXISTS wallets ( + address BLOB PRIMARY KEY, + public_modulus BLOB +); + +CREATE TABLE IF NOT EXISTS stable_data_items ( + -- Identity + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + height INTEGER NOT NULL, + block_transaction_index INTEGER NOT NULL, + signature BLOB NOT NULL, + anchor BLOB NOT NULL, + + -- Ownership + owner_address BLOB NOT NULL, + target BLOB, + + -- Data + data_offset INTEGER NOT NULL, + data_size INTEGER NOT NULL, + content_type TEXT, + + -- Metadata + tag_count INTEGER NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id) +); + +CREATE INDEX IF NOT EXISTS stable_data_items_height_block_transaction_index_id_idx ON stable_data_items (height, block_transaction_index, id); +CREATE INDEX IF NOT EXISTS stable_data_items_target_height_block_transaction_index_id_idx ON stable_data_items (target, height, block_transaction_index, id); +CREATE INDEX IF NOT EXISTS stable_data_items_owner_address_height_block_transaction_index_id_idx ON stable_data_items (owner_address, height, block_transaction_index, id); +CREATE INDEX IF NOT EXISTS stable_data_items_parent_id_height_block_transaction_index_id_idx ON stable_data_items (parent_id, height, block_transaction_index, id); + +CREATE TABLE IF NOT EXISTS tag_names ( + hash BLOB PRIMARY KEY, + name BLOB NOT NULL +); + +CREATE TABLE IF NOT EXISTS tag_values ( + hash BLOB PRIMARY KEY, + value BLOB NOT NULL +); + +CREATE TABLE IF NOT EXISTS stable_data_item_tags ( + tag_name_hash BLOB NOT NULL, + tag_value_hash BLOB NOT NULL, + height INTEGER NOT NULL, + block_transaction_index INTEGER NOT NULL, + data_item_tag_index INTEGER NOT NULL, + data_item_id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + PRIMARY KEY (tag_name_hash, tag_value_hash, height, block_transaction_index, data_item_id, data_item_tag_index) +); + +CREATE INDEX IF NOT EXISTS stable_data_item_tags_transaction_id_idx ON stable_data_item_tags (data_item_id); + +CREATE TABLE IF NOT EXISTS new_data_items ( + -- Identity + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + height INTEGER, + signature BLOB NOT NULL, + anchor BLOB NOT NULL, + + -- Ownership + owner_address BLOB NOT NULL, + target BLOB, + + -- Data + data_offset INTEGER NOT NULL, + data_size INTEGER NOT NULL, + content_type TEXT, + + -- Metadata + tag_count INTEGER NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id) +); + +CREATE INDEX IF NOT EXISTS new_data_items_parent_id_id_idx ON new_data_items (parent_id, id); +CREATE INDEX IF NOT EXISTS new_data_items_root_transaction_id_id_idx ON new_data_items (root_transaction_id, id); +CREATE INDEX IF NOT EXISTS new_data_items_target_id_idx ON new_data_items (target, id); +CREATE INDEX IF NOT EXISTS new_data_items_owner_address_id_idx ON new_data_items (owner_address, id); +CREATE INDEX IF NOT EXISTS new_data_items_height_indexed_at_idx ON new_data_items (height, indexed_at); + +CREATE TABLE IF NOT EXISTS new_data_item_tags ( + tag_name_hash BLOB NOT NULL, + tag_value_hash BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + data_item_id BLOB NOT NULL, + data_item_tag_index INTEGER NOT NULL, + height INTEGER, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (tag_name_hash, tag_value_hash, root_transaction_id, data_item_id, data_item_tag_index) +); + +CREATE INDEX IF NOT EXISTS new_data_item_tags_height_indexed_at_idx ON new_data_item_tags (height, indexed_at); diff --git a/migrations/down/2023.05.15T18.06.50.bundles.init-schema.sql b/migrations/down/2023.05.15T18.06.50.bundles.init-schema.sql new file mode 100644 index 00000000..934bab1f --- /dev/null +++ b/migrations/down/2023.05.15T18.06.50.bundles.init-schema.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS new_data_item_tags; +DROP TABLE IF EXISTS new_data_items; +DROP TABLE IF EXISTS stable_data_item_tags; +DROP TABLE IF EXISTS tag_values; +DROP TABLE IF EXISTS tag_names; +DROP TABLE IF EXISTS stable_data_items; +DROP TABLE IF EXISTS wallets; +DROP TABLE IF EXISTS bundles; +DROP TABLE IF EXISTS bundle_formats; From 4f0cee89e1e16414864bf79e88bd00bb5c554a83 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 16 May 2023 15:31:40 -0500 Subject: [PATCH 02/33] feat(sqlite): add bundle DB support to StandaloneSqlite PE-3769 Adds the wiring needed to use the new bundle DB in both the StandaloneSqlite class and the tests. --- src/database/standalone-sqlite.test.ts | 3 + src/database/standalone-sqlite.ts | 22 +++++- src/system.ts | 1 + src/workers/block-importer.test.ts | 2 + test/bundles-schema.sql | 102 +++++++++++++++++++++++++ test/core-schema.sql | 2 + test/dump-bundles-schema.sql | 3 + test/dump-test-schemas | 1 + test/sqlite-helpers.ts | 9 ++- 9 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 test/bundles-schema.sql create mode 100644 test/dump-bundles-schema.sql diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index 7637d907..ea4232a2 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -32,6 +32,7 @@ import { } from '../../src/database/standalone-sqlite.js'; import { fromB64Url, toB64Url } from '../../src/lib/encoding.js'; import { + bundlesDbPath, coreDb, coreDbPath, dataDbPath, @@ -134,11 +135,13 @@ describe('StandaloneSqliteDatabase', () => { coreDbPath, dataDbPath, moderationDbPath, + bundlesDbPath, }); dbWorker = new StandaloneSqliteDatabaseWorker({ coreDbPath, dataDbPath, moderationDbPath, + bundlesDbPath, }); chainSource = new ArweaveChainSourceStub(); }); diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index a2464366..05b7b231 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -216,11 +216,13 @@ export class StandaloneSqliteDatabaseWorker { core: Sqlite.Database; data: Sqlite.Database; moderation: Sqlite.Database; + bundles: Sqlite.Database; }; private stmts: { core: { [stmtName: string]: Sqlite.Statement }; data: { [stmtName: string]: Sqlite.Statement }; moderation: { [stmtName: string]: Sqlite.Statement }; + bundles: { [stmtName: string]: Sqlite.Statement }; }; // Transactions @@ -234,23 +236,26 @@ export class StandaloneSqliteDatabaseWorker { coreDbPath, dataDbPath, moderationDbPath, + bundlesDbPath, }: { coreDbPath: string; dataDbPath: string; moderationDbPath: string; + bundlesDbPath: string; }) { const timeout = 30000; this.dbs = { core: new Sqlite(coreDbPath, { timeout }), data: new Sqlite(dataDbPath, { timeout }), moderation: new Sqlite(moderationDbPath, { timeout }), + bundles: new Sqlite(bundlesDbPath, { timeout }), }; for (const db of Object.values(this.dbs)) { db.pragma('journal_mode = WAL'); db.pragma('page_size = 4096'); // may depend on OS and FS } - this.stmts = { core: {}, data: {}, moderation: {} }; + this.stmts = { core: {}, data: {}, moderation: {}, bundles: {} }; for (const [stmtsKey, stmts] of Object.entries(this.stmts)) { const sqlUrl = new URL(`./sql/${stmtsKey}`, import.meta.url); @@ -262,7 +267,8 @@ export class StandaloneSqliteDatabaseWorker { if ( stmtsKey === 'core' || stmtsKey === 'data' || - stmtsKey === 'moderation' + stmtsKey === 'moderation' || + stmtsKey === 'bundles' ) { stmts[k] = this.dbs[stmtsKey].prepare(sql); } else { @@ -1435,13 +1441,14 @@ export class StandaloneSqliteDatabaseWorker { } } -type WorkerPoolName = 'core' | 'data' | 'gql' | 'debug' | 'moderation'; +type WorkerPoolName = 'core' | 'data' | 'gql' | 'debug' | 'moderation' | 'bundles'; const WORKER_POOL_NAMES: Array = [ 'core', 'data', 'gql', 'debug', 'moderation', + 'bundles', ]; type WorkerRoleName = 'read' | 'write'; @@ -1456,6 +1463,7 @@ const WORKER_POOL_SIZES: WorkerPoolSizes = { gql: { read: CPU_COUNT, write: 0 }, debug: { read: 1, write: 0 }, moderation: { read: 1, write: 1 }, + bundles: { read: 1, write: 1 }, }; export class StandaloneSqliteDatabase @@ -1474,12 +1482,14 @@ export class StandaloneSqliteDatabase gql: { read: any[]; write: any[] }; debug: { read: any[]; write: any[] }; moderation: { read: any[]; write: any[] }; + bundles: { read: any[]; write: any[] }; } = { core: { read: [], write: [] }, data: { read: [], write: [] }, gql: { read: [], write: [] }, debug: { read: [], write: [] }, moderation: { read: [], write: [] }, + bundles: { read: [], write: [] }, }; private workQueues: { core: { read: any[]; write: any[] }; @@ -1487,12 +1497,14 @@ export class StandaloneSqliteDatabase gql: { read: any[]; write: any[] }; debug: { read: any[]; write: any[] }; moderation: { read: any[]; write: any[] }; + bundles: { read: any[]; write: any[] }; } = { core: { read: [], write: [] }, data: { read: [], write: [] }, gql: { read: [], write: [] }, debug: { read: [], write: [] }, moderation: { read: [], write: [] }, + bundles: { read: [], write: [] }, }; constructor({ log, @@ -1500,12 +1512,14 @@ export class StandaloneSqliteDatabase coreDbPath, dataDbPath, moderationDbPath, + bundlesDbPath, }: { log: winston.Logger; metricsRegistry: promClient.Registry; coreDbPath: string; dataDbPath: string; moderationDbPath: string; + bundlesDbPath: string; }) { this.log = log.child({ class: 'StandaloneSqliteDatabase' }); @@ -1526,6 +1540,7 @@ export class StandaloneSqliteDatabase coreDbPath, dataDbPath, moderationDbPath, + bundlesDbPath, }, }); @@ -1846,6 +1861,7 @@ if (!isMainThread) { coreDbPath: workerData.coreDbPath, dataDbPath: workerData.dataDbPath, moderationDbPath: workerData.moderationDbPath, + bundlesDbPath: workerData.bundlesDbPath, }); parentPort?.on('message', ({ method, args }: WorkerMessage) => { diff --git a/src/system.ts b/src/system.ts index 20528f2e..24665dd3 100644 --- a/src/system.ts +++ b/src/system.ts @@ -98,6 +98,7 @@ export const db = new StandaloneSqliteDatabase({ coreDbPath: 'data/sqlite/core.db', dataDbPath: 'data/sqlite/data.db', moderationDbPath: 'data/sqlite/moderation.db', + bundlesDbPath: 'data/sqlite/bundles.db', }); export const chainIndex: ChainIndex = db; diff --git a/src/workers/block-importer.test.ts b/src/workers/block-importer.test.ts index 2f1d9443..1f24cafe 100644 --- a/src/workers/block-importer.test.ts +++ b/src/workers/block-importer.test.ts @@ -27,6 +27,7 @@ import { StandaloneSqliteDatabase } from '../../src/database/standalone-sqlite.j import log from '../../src/log.js'; import { BlockImporter } from '../../src/workers/block-importer.js'; import { + bundlesDbPath, coreDbPath, dataDbPath, moderationDbPath, @@ -74,6 +75,7 @@ describe('BlockImporter', () => { db = new StandaloneSqliteDatabase({ log, metricsRegistry, + bundlesDbPath, coreDbPath, dataDbPath, moderationDbPath, diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql new file mode 100644 index 00000000..81f5bf7f --- /dev/null +++ b/test/bundles-schema.sql @@ -0,0 +1,102 @@ +CREATE TABLE bundle_formats ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL +); +CREATE TABLE bundles ( + id BLOB PRIMARY KEY, + format INTEGER NOT NULL, + data_item_count INTEGER NOT NULL, + first_processed_at INTEGER NOT NULL, + last_processed_at INTEGER NOT NULL +); +CREATE TABLE wallets ( + address BLOB PRIMARY KEY, + public_modulus BLOB +); +CREATE TABLE stable_data_items ( + -- Identity + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + height INTEGER NOT NULL, + block_transaction_index INTEGER NOT NULL, + signature BLOB NOT NULL, + anchor BLOB NOT NULL, + + -- Ownership + owner_address BLOB NOT NULL, + target BLOB, + + -- Data + data_offset INTEGER NOT NULL, + data_size INTEGER NOT NULL, + content_type TEXT, + + -- Metadata + tag_count INTEGER NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id) +); +CREATE INDEX stable_data_items_height_block_transaction_index_id_idx ON stable_data_items (height, block_transaction_index, id); +CREATE INDEX stable_data_items_target_height_block_transaction_index_id_idx ON stable_data_items (target, height, block_transaction_index, id); +CREATE INDEX stable_data_items_owner_address_height_block_transaction_index_id_idx ON stable_data_items (owner_address, height, block_transaction_index, id); +CREATE INDEX stable_data_items_parent_id_height_block_transaction_index_id_idx ON stable_data_items (parent_id, height, block_transaction_index, id); +CREATE TABLE tag_names ( + hash BLOB PRIMARY KEY, + name BLOB NOT NULL +); +CREATE TABLE tag_values ( + hash BLOB PRIMARY KEY, + value BLOB NOT NULL +); +CREATE TABLE stable_data_item_tags ( + tag_name_hash BLOB NOT NULL, + tag_value_hash BLOB NOT NULL, + height INTEGER NOT NULL, + block_transaction_index INTEGER NOT NULL, + data_item_tag_index INTEGER NOT NULL, + data_item_id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + PRIMARY KEY (tag_name_hash, tag_value_hash, height, block_transaction_index, data_item_id, data_item_tag_index) +); +CREATE INDEX stable_data_item_tags_transaction_id_idx ON stable_data_item_tags (data_item_id); +CREATE TABLE new_data_items ( + -- Identity + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + height INTEGER, + signature BLOB NOT NULL, + anchor BLOB NOT NULL, + + -- Ownership + owner_address BLOB NOT NULL, + target BLOB, + + -- Data + data_offset INTEGER NOT NULL, + data_size INTEGER NOT NULL, + content_type TEXT, + + -- Metadata + tag_count INTEGER NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id) +); +CREATE INDEX new_data_items_parent_id_id_idx ON new_data_items (parent_id, id); +CREATE INDEX new_data_items_root_transaction_id_id_idx ON new_data_items (root_transaction_id, id); +CREATE INDEX new_data_items_target_id_idx ON new_data_items (target, id); +CREATE INDEX new_data_items_owner_address_id_idx ON new_data_items (owner_address, id); +CREATE INDEX new_data_items_height_created_at_idx ON new_data_items (height, created_at); +CREATE TABLE new_data_item_tags ( + tag_name_hash BLOB NOT NULL, + tag_value_hash BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + data_item_id BLOB NOT NULL, + data_item_tag_index INTEGER NOT NULL, + height INTEGER, + created_at INTEGER NOT NULL, + PRIMARY KEY (tag_name_hash, tag_value_hash, root_transaction_id, data_item_id, data_item_tag_index) +); +CREATE INDEX new_data_item_tags_height_created_at_idx ON new_data_item_tags (height, created_at); diff --git a/test/core-schema.sql b/test/core-schema.sql index 7f628d53..0a0baa92 100644 --- a/test/core-schema.sql +++ b/test/core-schema.sql @@ -192,3 +192,5 @@ CREATE INDEX new_block_transactions_height_idx ON new_block_transactions (height CREATE INDEX new_transactions_height_created_at_idx ON new_transactions (height, created_at); CREATE INDEX missing_transactions_height_transaction_id_idx ON missing_transactions (height, transaction_id); CREATE INDEX new_transaction_tags_height_created_at_idx ON new_transaction_tags (height, created_at); +CREATE INDEX sable_block_transactions_transaction_id_idx + ON stable_block_transactions (transaction_id); diff --git a/test/dump-bundles-schema.sql b/test/dump-bundles-schema.sql new file mode 100644 index 00000000..814480ff --- /dev/null +++ b/test/dump-bundles-schema.sql @@ -0,0 +1,3 @@ +.output test/bundles-schema.sql +.schema +.exit diff --git a/test/dump-test-schemas b/test/dump-test-schemas index 416bf9a4..269636f3 100755 --- a/test/dump-test-schemas +++ b/test/dump-test-schemas @@ -3,6 +3,7 @@ sqlite3 data/sqlite/core.db < test/dump-core-schema.sql sqlite3 data/sqlite/data.db < test/dump-data-schema.sql sqlite3 data/sqlite/moderation.db < test/dump-moderation-schema.sql +sqlite3 data/sqlite/bundles.db < test/dump-bundles-schema.sql # remove sqlite_sequence table from schema dumps sed -i '/sqlite_sequence/d' test/*-schema.sql diff --git a/test/sqlite-helpers.ts b/test/sqlite-helpers.ts index d36aecfd..b75271ee 100644 --- a/test/sqlite-helpers.ts +++ b/test/sqlite-helpers.ts @@ -6,9 +6,11 @@ import log from '../src/log.js'; export const coreDbPath = `test/tmp/core.db`; export const dataDbPath = `test/tmp/data.db`; export const moderationDbPath = `test/tmp/moderation.db`; +export const bundlesDbPath = `test/tmp/bundles.db`; export let coreDb: Sqlite.Database; export let dataDb: Sqlite.Database; export let moderationDb: Sqlite.Database; +export let bundlesDb: Sqlite.Database; /* eslint-disable */ before(async () => { @@ -33,10 +35,15 @@ before(async () => { moderationDb = new Sqlite(moderationDbPath); const moderationSchema = fs.readFileSync('test/moderation-schema.sql', 'utf8'); moderationDb.exec(moderationSchema); + + // Bundles DB + bundlesDb = new Sqlite(bundlesDbPath); + const bundlesSchema = fs.readFileSync('test/bundles-schema.sql', 'utf8'); + bundlesDb.exec(bundlesSchema); }); afterEach(async () => { - [coreDb, dataDb, moderationDb].forEach((db) => { + [coreDb, dataDb, moderationDb, bundlesDb].forEach((db) => { db.prepare("SELECT name FROM sqlite_schema WHERE type='table'") .all() .forEach((row) => { From 8b4d8e2ad3f3a8d4e07c050f6e71cebed8c19966 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 16 May 2023 17:12:03 -0500 Subject: [PATCH 03/33] refactor(sqlite): extract tx row construction helper functions PE-3769 Extracting some small helper functions so they can used when constructing data item rows too. --- src/database/standalone-sqlite.ts | 54 +++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 05b7b231..87fb77ba 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -117,6 +117,23 @@ export function toSqliteParams(sqlBricksParams: { values: any[] }) { }, {} as { [key: string]: any }); } +function hashTagPart(value: Buffer) { + return crypto.createHash('sha1').update(value).digest(); +} + +function isContentTypeTag(tagName: Buffer) { + return tagName.toString('utf8').toLowerCase() === 'content-type'; +} + +// TODO switch to milliseconds +function currentTimestamp() { + return +(Date.now() / 1000).toFixed(0); +} + +function ownerToAddress(owner: Buffer) { + return crypto.createHash('sha256').update(owner).digest(); +} + export function txToDbRows(tx: PartialJsonTransaction, height?: number) { const tagNames = [] as { name: Buffer; hash: Buffer }[]; const tagValues = [] as { value: Buffer; hash: Buffer }[]; @@ -135,14 +152,14 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { let transactionTagIndex = 0; for (const tag of tx.tags) { const tagName = fromB64Url(tag.name); - const tagNameHash = crypto.createHash('sha1').update(tagName).digest(); + const tagNameHash = hashTagPart(tagName); tagNames.push({ name: tagName, hash: tagNameHash }); const tagValue = fromB64Url(tag.value); - const tagValueHash = crypto.createHash('sha1').update(tagValue).digest(); + const tagValueHash = hashTagPart(tagValue); tagValues.push({ value: tagValue, hash: tagValueHash }); - if (tagName.toString('utf8').toLowerCase() === 'content-type') { + if (isContentTypeTag(tagName)) { contentType = tagValue.toString('utf8'); } @@ -151,17 +168,14 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { tag_value_hash: tagValueHash, transaction_id: txId, transaction_tag_index: transactionTagIndex, - created_at: +(Date.now() / 1000).toFixed(0), + created_at: currentTimestamp(), }); transactionTagIndex++; } const ownerBuffer = fromB64Url(tx.owner); - const ownerAddressBuffer = crypto - .createHash('sha256') - .update(ownerBuffer) - .digest(); + const ownerAddressBuffer = ownerToAddress(ownerBuffer); wallets.push({ address: ownerAddressBuffer, public_modulus: ownerBuffer }); @@ -183,7 +197,7 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { data_root: fromB64Url(tx.data_root), content_type: contentType, tag_count: tx.tags.length, - created_at: +(Date.now() / 1000).toFixed(0), + created_at: currentTimestamp(), height: height, }, }; @@ -628,19 +642,19 @@ export class StandaloneSqliteDatabaseWorker { hash: hashBuffer, data_size: dataSize, original_source_content_type: contentType, - indexed_at: +(Date.now() / 1000).toFixed(0), + indexed_at: currentTimestamp(), cached_at: cachedAt, }); this.stmts.data.insertDataId.run({ id: fromB64Url(id), contiguous_data_hash: hashBuffer, - indexed_at: +(Date.now() / 1000).toFixed(0), + indexed_at: currentTimestamp(), }); if (dataRoot !== undefined) { this.stmts.data.insertDataRoot.run({ data_root: fromB64Url(dataRoot), contiguous_data_hash: hashBuffer, - indexed_at: +(Date.now() / 1000).toFixed(0), + indexed_at: currentTimestamp(), }); } } @@ -1397,7 +1411,7 @@ export class StandaloneSqliteDatabaseWorker { if (source !== undefined) { this.stmts.moderation.insertSource.run({ name: source, - created_at: +(Date.now() / 1000).toFixed(0), + created_at: currentTimestamp(), }); sourceId = this.stmts.moderation.getSourceByName.get({ name: source, @@ -1408,14 +1422,14 @@ export class StandaloneSqliteDatabaseWorker { id: fromB64Url(id), block_source_id: sourceId, notes, - blocked_at: +(Date.now() / 1000).toFixed(0), + blocked_at: currentTimestamp(), }); } else if (hash !== undefined) { this.stmts.moderation.insertBlockedHash.run({ hash: fromB64Url(hash), block_source_id: sourceId, notes, - blocked_at: +(Date.now() / 1000).toFixed(0), + blocked_at: currentTimestamp(), }); } } @@ -1436,12 +1450,18 @@ export class StandaloneSqliteDatabaseWorker { parent_id: fromB64Url(parentId), data_offset: dataOffset, data_size: dataSize, - created_at: +(Date.now() / 1000).toFixed(0), + created_at: currentTimestamp(), }); } } -type WorkerPoolName = 'core' | 'data' | 'gql' | 'debug' | 'moderation' | 'bundles'; +type WorkerPoolName = + | 'core' + | 'data' + | 'gql' + | 'debug' + | 'moderation' + | 'bundles'; const WORKER_POOL_NAMES: Array = [ 'core', 'data', From ab15e67e1d07e4ca2c2b1f20246a28bdd674420b Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 16 May 2023 17:44:43 -0500 Subject: [PATCH 04/33] feat(bundles): index ANS-104 bundles in new data item tables PE-3769 Records ANS-104 metadata in new data item tables. Flushing to stable data items tables is not yet implemented. Also implements propogation of a root parent transaction ID to the ANS-104 unbundler. A root parent transaction ID is needed to efficiently find and sort data items when executing GQL queries. --- src/database/sql/bundles/import.sql | 44 +++++++++ src/database/sql/core/async-import.sql | 14 +++ src/database/standalone-sqlite.test.ts | 33 ++++++- src/database/standalone-sqlite.ts | 128 +++++++++++++++++++++++++ src/lib/ans-104.ts | 78 ++++++++------- src/system.ts | 2 +- src/types.d.ts | 3 +- src/workers/ans104-unbundler.ts | 33 +++++-- test/bundles-schema.sql | 13 ++- 9 files changed, 300 insertions(+), 48 deletions(-) create mode 100644 src/database/sql/bundles/import.sql diff --git a/src/database/sql/bundles/import.sql b/src/database/sql/bundles/import.sql new file mode 100644 index 00000000..3f1a0987 --- /dev/null +++ b/src/database/sql/bundles/import.sql @@ -0,0 +1,44 @@ +-- insertOrIgnoreWallet +INSERT INTO wallets (address, public_modulus) +VALUES (@address, @public_modulus) +ON CONFLICT DO NOTHING + +-- insertOrIgnoreTagName +INSERT INTO tag_names (hash, name) +VALUES (@hash, @name) +ON CONFLICT DO NOTHING + +-- insertOrIgnoreTagValue +INSERT INTO tag_values (hash, value) +VALUES (@hash, @value) +ON CONFLICT DO NOTHING + +-- upsertNewDataItemTag +INSERT INTO new_data_item_tags ( + tag_name_hash, tag_value_hash, + root_transaction_id, data_item_id, data_item_tag_index, + height, indexed_at +) VALUES ( + @tag_name_hash, @tag_value_hash, + @root_transaction_id, @data_item_id, @data_item_tag_index, + @height, @indexed_at +) ON CONFLICT DO UPDATE SET height = IFNULL(@height, height) + +-- insertOrIgnoreBundleDataItem +INSERT INTO bundle_data_items ( + id, parent_id, root_transaction_id, indexed_at +) VALUES ( + @id, @parent_id, @root_transaction_id, @indexed_at +) +ON CONFLICT DO NOTHING + +-- upsertNewDataItem +INSERT INTO new_data_items ( + id, parent_id, root_transaction_id, height, signature, anchor, + owner_address, target, data_offset, data_size, content_type, + tag_count, indexed_at +) VALUES ( + @id, @parent_id, @root_transaction_id, @height, @signature, @anchor, + @owner_address, @target, @data_offset, @data_size, @content_type, + @tag_count, @indexed_at +) ON CONFLICT DO UPDATE SET height = IFNULL(@height, height) diff --git a/src/database/sql/core/async-import.sql b/src/database/sql/core/async-import.sql index 0ee429ef..84f36a6f 100644 --- a/src/database/sql/core/async-import.sql +++ b/src/database/sql/core/async-import.sql @@ -22,3 +22,17 @@ WHERE transaction_id = @transaction_id SELECT MAX(height)+1 FROM stable_blocks ) + +-- selectTransactionHeight +SELECT height +FROM new_transactions +WHERE id = @transaction_id AND height IS NOT NULL +UNION +SELECT height +FROM stable_transactions +WHERE id = @transaction_id AND height IS NOT NULL +UNION +SELECT height +FROM missing_transactions +WHERE transaction_id = @transaction_id +LIMIT 1 diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index ea4232a2..f553c614 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -16,6 +16,7 @@ * along with this program. If not, see . */ import { ValidationError } from 'apollo-server-express'; +import arbundles from 'arbundles/stream/index.js'; import { expect } from 'chai'; import crypto from 'node:crypto'; import fs from 'node:fs'; @@ -24,6 +25,7 @@ import * as promClient from 'prom-client'; import { StandaloneSqliteDatabase, StandaloneSqliteDatabaseWorker, + dataItemToDbRows, decodeBlockGqlCursor, decodeTransactionGqlCursor, encodeBlockGqlCursor, @@ -38,9 +40,16 @@ import { dataDbPath, moderationDbPath, } from '../../test/sqlite-helpers.js'; -import { ArweaveChainSourceStub } from '../../test/stubs.js'; +import { ArweaveChainSourceStub, stubAns104Bundle } from '../../test/stubs.js'; +import { normalizeAns104DataItem } from '../lib/ans-104.js'; import log from '../log.js'; +//import { NormalizedDataItem } from '../types.js'; + +/* eslint-disable */ +// @ts-ignore +const { default: processStream } = arbundles; + const HEIGHT = 1138; const BLOCK_TX_INDEX = 42; @@ -120,6 +129,28 @@ describe('SQLite GraphQL cursor functions', () => { }); }); +describe('SQLite data conversion functions', () => { + describe('dataItemToDbRows', () => { + it('should return DB rows to insert', async () => { + const bundleStream = await stubAns104Bundle(); + const iterable = await processStream(bundleStream); + for await (const [_index, dataItem] of iterable.entries()) { + const normalizedDataItem = normalizeAns104DataItem( + '0000000000000000000000000000000000000000000', + '0000000000000000000000000000000000000000000', + dataItem, + ); + const rows = dataItemToDbRows(normalizedDataItem); + expect(rows.tagNames.length).to.be.above(0); + expect(rows.tagValues.length).to.be.above(0); + expect(rows.newDataItemTags.length).to.be.above(0); + expect(rows.wallets.length).to.be.above(0); + expect(rows.newDataItem).to.be.an('object'); + } + }); + }); +}); + describe('StandaloneSqliteDatabase', () => { let metricsRegistry: promClient.Registry; let chainSource: ArweaveChainSourceStub; diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 87fb77ba..b915a91d 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -50,6 +50,7 @@ import { ContiguousDataIndex, GqlQueryable, NestedDataIndexWriter, + NormalizedDataItem, PartialJsonBlock, PartialJsonTransaction, } from '../types.js'; @@ -203,6 +204,85 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { }; } +export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { + const tagNames = [] as { name: Buffer; hash: Buffer }[]; + const tagValues = [] as { value: Buffer; hash: Buffer }[]; + const newDataItemTags = [] as { + tag_name_hash: Buffer; + tag_value_hash: Buffer; + root_transaction_id: Buffer; + data_item_id: Buffer; + data_item_tag_index: number; + indexed_at: number; + }[]; + const wallets = [] as { address: Buffer; public_modulus: Buffer }[]; + + let contentType: string | undefined; + const id = fromB64Url(item.id); + + let dataItemTagIndex = 0; + for (const tag of item.tags) { + const tagName = fromB64Url(tag.name); + const tagNameHash = hashTagPart(tagName); + tagNames.push({ name: tagName, hash: tagNameHash }); + + const tagValue = fromB64Url(tag.value); + const tagValueHash = hashTagPart(tagValue); + tagValues.push({ value: tagValue, hash: tagValueHash }); + + if (isContentTypeTag(tagName)) { + contentType = tagValue.toString('utf8'); + } + + newDataItemTags.push({ + tag_name_hash: tagNameHash, + tag_value_hash: tagValueHash, + root_transaction_id: fromB64Url(item.root_tx_id), + data_item_id: id, + data_item_tag_index: dataItemTagIndex, + indexed_at: currentTimestamp(), + }); + + dataItemTagIndex++; + } + + const ownerBuffer = fromB64Url(item.owner); + const ownerAddressBuffer = ownerToAddress(ownerBuffer); + + wallets.push({ address: ownerAddressBuffer, public_modulus: ownerBuffer }); + + const parentId = fromB64Url(item.parent_id); + const rootTxId = fromB64Url(item.root_tx_id); + + return { + tagNames, + tagValues, + newDataItemTags, + wallets, + newBundleDataItem: { + id, + parent_id: parentId, + root_transaction_id: rootTxId, + indexed_at: currentTimestamp(), + }, + newDataItem: { + id, + parent_id: parentId, + root_transaction_id: rootTxId, + height: height, + signature: fromB64Url(item.signature), + anchor: fromB64Url(item.anchor), + owner_address: ownerAddressBuffer, + target: fromB64Url(item.target), + data_offset: item.data_offset, + data_size: item.data_size, + content_type: contentType, + tag_count: item.tags.length, + indexed_at: currentTimestamp(), + }, + }; +} + type DebugInfo = { counts: { wallets: number; @@ -242,6 +322,7 @@ export class StandaloneSqliteDatabaseWorker { // Transactions resetToHeightFn: Sqlite.Transaction; insertTxFn: Sqlite.Transaction; + insertDataItemFn: Sqlite.Transaction; insertBlockAndTxsFn: Sqlite.Transaction; saveStableDataFn: Sqlite.Transaction; deleteStaleNewDataFn: Sqlite.Transaction; @@ -336,6 +417,37 @@ export class StandaloneSqliteDatabaseWorker { }, ); + this.insertDataItemFn = this.dbs.core.transaction( + (item: NormalizedDataItem, height?: number) => { + // Insert the data item + const rows = dataItemToDbRows(item); + + for (const row of rows.tagNames) { + this.stmts.bundles.insertOrIgnoreTagName.run(row); + } + + for (const row of rows.tagValues) { + this.stmts.bundles.insertOrIgnoreTagValue.run(row); + } + + for (const row of rows.newDataItemTags) { + this.stmts.bundles.upsertNewDataItemTag.run({ + ...row, + height, + }); + } + + for (const row of rows.wallets) { + this.stmts.bundles.insertOrIgnoreWallet.run(row); + } + + this.stmts.bundles.upsertNewDataItem.run({ + ...rows.newDataItem, + height, + }); + }, + ); + this.insertBlockAndTxsFn = this.dbs.core.transaction( ( block: PartialJsonBlock, @@ -513,6 +625,14 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.core.deleteNewMissingTransaction.run({ transaction_id: txId }); } + saveDataItem(item: NormalizedDataItem) { + const rootTxId = fromB64Url(item.root_tx_id); + const maybeTxHeight = this.stmts.core.selectTransactionHeight.get({ + transaction_id: rootTxId, + })?.height; + this.insertDataItemFn(item, maybeTxHeight); + } + saveBlockAndTxs( block: PartialJsonBlock, txs: PartialJsonTransaction[], @@ -1701,6 +1821,10 @@ export class StandaloneSqliteDatabase return this.queueWrite('core', 'saveTx', [tx]); } + saveDataItem(item: NormalizedDataItem): Promise { + return this.queueWrite('bundles', 'saveDataItem', [item]); + } + saveBlockAndTxs( block: PartialJsonBlock, txs: PartialJsonTransaction[], @@ -1906,6 +2030,10 @@ if (!isMainThread) { worker.saveTx(args[0]); parentPort?.postMessage(null); break; + case 'saveDataItem': + worker.saveDataItem(args[0]); + parentPort?.postMessage(null); + break; case 'saveBlockAndTxs': const [block, txs, missingTxIds] = args; worker.saveBlockAndTxs(block, txs, missingTxIds); diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 14cce044..161088e7 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -16,6 +16,7 @@ import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; const { default: processStream } = arbundles; export function normalizeAns104DataItem( + rootTxId: string, parentTxId: string, ans104DataItem: Record, ): NormalizedDataItem { @@ -29,8 +30,9 @@ export function normalizeAns104DataItem( ); return { - parent_id: parentTxId, id: ans104DataItem.id, + parent_id: parentTxId, + root_tx_id: rootTxId, signature: ans104DataItem.signature, owner: ans104DataItem.owner, owner_address: sha256B64Url(fromB64Url(ans104DataItem.owner)), @@ -87,36 +89,44 @@ export class Ans104Parser { }); } - async parseBundle({ parentTxId }: { parentTxId: string }): Promise { - const unbundlePromise: Promise = new Promise(async (resolve, reject) => { - const log = this.log.child({ parentTxId }); - log.debug('Waiting for previous bundle to finish...'); - while (this.unbundlePromise) { - await wait(100); - } - log.debug('Previous bundle finished.'); - await fsPromises.mkdir(path.join(process.cwd(), 'data/tmp/ans-104'), { - recursive: true, - }); - const data = await this.contiguousDataSource.getData(parentTxId); - const bundlePath = path.join( - process.cwd(), - 'data/tmp/ans-104', - `${parentTxId}`, - ); - const writeStream = fs.createWriteStream(bundlePath); - // TODO consider using pipeline - data.stream.pipe(writeStream); - writeStream.on('error', (error) => { - log.error('Error writing ANS-104 bundle stream', error); - reject(error); - }); - writeStream.on('finish', async () => { - log.info('Parsing ANS-104 bundle stream...'); - this.worker.postMessage({ parentTxId, bundlePath }); - resolve(); - }); - }); + async parseBundle({ + rootTxId, + parentId, + }: { + rootTxId: string; + parentId: string; + }): Promise { + const unbundlePromise: Promise = new Promise( + async (resolve, reject) => { + const log = this.log.child({ parentId }); + log.debug('Waiting for previous bundle to finish...'); + while (this.unbundlePromise) { + await wait(100); + } + log.debug('Previous bundle finished.'); + await fsPromises.mkdir(path.join(process.cwd(), 'data/tmp/ans-104'), { + recursive: true, + }); + const data = await this.contiguousDataSource.getData(parentId); + const bundlePath = path.join( + process.cwd(), + 'data/tmp/ans-104', + `${parentId}`, + ); + const writeStream = fs.createWriteStream(bundlePath); + // TODO consider using pipeline + data.stream.pipe(writeStream); + writeStream.on('error', (error) => { + log.error('Error writing ANS-104 bundle stream', error); + reject(error); + }); + writeStream.on('finish', async () => { + log.info('Parsing ANS-104 bundle stream...'); + this.worker.postMessage({ rootTxId, parentId, bundlePath }); + resolve(); + }); + }, + ); this.unbundlePromise = unbundlePromise; return unbundlePromise; } @@ -124,13 +134,13 @@ export class Ans104Parser { if (!isMainThread) { parentPort?.on('message', async (message: any) => { - const { parentTxId, bundlePath } = message; + const { rootTxId, parentId, bundlePath } = message; try { const stream = fs.createReadStream(bundlePath); const iterable = await processStream(stream); const bundleLength = iterable.length; - const fnLog = log.child({ parentTxId, bundleLength }); + const fnLog = log.child({ rootTxId, parentId, bundleLength }); fnLog.info('Unbundling ANS-104 bundle stream data items...'); const processedDataItemIds = new Set(); @@ -160,7 +170,7 @@ if (!isMainThread) { parentPort?.postMessage({ eventName: 'data-item-unbundled', - dataItem: normalizeAns104DataItem(parentTxId, dataItem), + dataItem: normalizeAns104DataItem(rootTxId, parentId, dataItem), }); } parentPort?.postMessage({ eventName: 'unbundle-complete' }); diff --git a/src/system.ts b/src/system.ts index 24665dd3..ef3290a4 100644 --- a/src/system.ts +++ b/src/system.ts @@ -202,7 +202,7 @@ eventEmitter.on( events.ANS104_TX_INDEXED, async (tx: PartialJsonTransaction) => { if (await config.ANS104_UNBUNDLE_FILTER.match(tx)) { - ans104Unbundler.queueTx(tx); + ans104Unbundler.queueItem(tx); } }, ); diff --git a/src/types.d.ts b/src/types.d.ts index 00ebcdb5..14bd587f 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -199,8 +199,9 @@ export interface NestedDataIndexWriter { } export interface NormalizedDataItem { - parent_id: string; id: string; + parent_id: string; + root_tx_id: string; signature: string; owner: string; owner_address: string; diff --git a/src/workers/ans104-unbundler.ts b/src/workers/ans104-unbundler.ts index c563e3d2..9c30ecb1 100644 --- a/src/workers/ans104-unbundler.ts +++ b/src/workers/ans104-unbundler.ts @@ -24,11 +24,14 @@ import { Ans104Parser } from '../lib/ans-104.js'; import { ContiguousDataSource, ItemFilter, + NormalizedDataItem, PartialJsonTransaction, } from '../types.js'; const DEFAULT_WORKER_COUNT = 1; +type UnbundleableItem = NormalizedDataItem | PartialJsonTransaction; + export class Ans104Unbundler { // Dependencies private log: winston.Logger; @@ -36,7 +39,7 @@ export class Ans104Unbundler { private ans104Parser: Ans104Parser; // Unbundling queue - private queue: queueAsPromised; + private queue: queueAsPromised; constructor({ log, @@ -62,19 +65,33 @@ export class Ans104Unbundler { this.queue = fastq.promise(this.unbundle.bind(this), workerCount); } - async queueTx(tx: PartialJsonTransaction): Promise { - const log = this.log.child({ method: 'queueTx', txId: tx.id }); + async queueItem(item: UnbundleableItem): Promise { + const log = this.log.child({ method: 'queueItem', id: item.id }); log.debug('Queueing bundle...'); - this.queue.push(tx); + this.queue.push(item); log.debug('Bundle queued.'); } - async unbundle(tx: PartialJsonTransaction): Promise { - const log = this.log.child({ method: 'unbundle', txId: tx.id }); + async unbundle(item: UnbundleableItem): Promise { + const log = this.log.child({ method: 'unbundle', id: item.id }); try { - if (await this.filter.match(tx)) { + let rootTxId: string | undefined; + if ('root_tx_id' in item) { + // Data item with root_tx_id + rootTxId = item.root_tx_id; + } else if ('last_tx' in item) { + // Layer 1 transaction + rootTxId = item.id; + } else { + // Data item without root_tx_id (should be impossible) + throw new Error('Missing root_tx_id on data item.'); + } + if (await this.filter.match(item)) { log.info('Unbundling bundle...'); - await this.ans104Parser.parseBundle({ parentTxId: tx.id }); + await this.ans104Parser.parseBundle({ + rootTxId, + parentId: item.id, + }); log.info('Bundle unbundled.'); } } catch (error) { diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql index 81f5bf7f..978feab2 100644 --- a/test/bundles-schema.sql +++ b/test/bundles-schema.sql @@ -9,6 +9,13 @@ CREATE TABLE bundles ( first_processed_at INTEGER NOT NULL, last_processed_at INTEGER NOT NULL ); +CREATE TABLE bundle_data_items ( + id BLOB, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id, parent_id) +); CREATE TABLE wallets ( address BLOB PRIMARY KEY, public_modulus BLOB @@ -88,7 +95,7 @@ CREATE INDEX new_data_items_parent_id_id_idx ON new_data_items (parent_id, id); CREATE INDEX new_data_items_root_transaction_id_id_idx ON new_data_items (root_transaction_id, id); CREATE INDEX new_data_items_target_id_idx ON new_data_items (target, id); CREATE INDEX new_data_items_owner_address_id_idx ON new_data_items (owner_address, id); -CREATE INDEX new_data_items_height_created_at_idx ON new_data_items (height, created_at); +CREATE INDEX new_data_items_height_indexed_at_idx ON new_data_items (height, indexed_at); CREATE TABLE new_data_item_tags ( tag_name_hash BLOB NOT NULL, tag_value_hash BLOB NOT NULL, @@ -96,7 +103,7 @@ CREATE TABLE new_data_item_tags ( data_item_id BLOB NOT NULL, data_item_tag_index INTEGER NOT NULL, height INTEGER, - created_at INTEGER NOT NULL, + indexed_at INTEGER NOT NULL, PRIMARY KEY (tag_name_hash, tag_value_hash, root_transaction_id, data_item_id, data_item_tag_index) ); -CREATE INDEX new_data_item_tags_height_created_at_idx ON new_data_item_tags (height, created_at); +CREATE INDEX new_data_item_tags_height_indexed_at_idx ON new_data_item_tags (height, indexed_at); From bb70d8ca36c58c389c1a9265eb29263aec5e7232 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 22 May 2023 16:53:54 -0500 Subject: [PATCH 05/33] feat(bundles): save stable ans-104 data items PE-3769 Adds SQL to flush stable data items to the stable data item and data item tags tables as well as remove flushed data from the new data item tables. This is still relatively unoptimized and is not yet exhaustive in its cleanup of stale data. --- src/database/sql/bundles/cleanup.sql | 25 +++++++++++++ src/database/sql/bundles/flush.sql | 51 ++++++++++++++++++++++++++ src/database/standalone-sqlite.test.ts | 6 +-- src/database/standalone-sqlite.ts | 44 ++++++++++++++++++---- 4 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 src/database/sql/bundles/cleanup.sql create mode 100644 src/database/sql/bundles/flush.sql diff --git a/src/database/sql/bundles/cleanup.sql b/src/database/sql/bundles/cleanup.sql new file mode 100644 index 00000000..e9d8e72b --- /dev/null +++ b/src/database/sql/bundles/cleanup.sql @@ -0,0 +1,25 @@ +-- deleteStaleNewDataItems +DELETE FROM new_data_items +WHERE id IN ( + SELECT DISTINCT ndi.id + FROM new_data_items ndi + LEFT JOIN core.stable_transactions st + ON ndi.root_transaction_id = st.id + AND st.height < @height_threshold + LEFT JOIN core.missing_transactions mt + ON ndi.root_transaction_id = mt.transaction_id + AND mt.height < @height_threshold +) + +-- deleteStaleNewDataItemTags +DELETE FROM new_data_item_tags +WHERE data_item_id IN ( + SELECT DISTINCT ndi.id + FROM new_data_items ndi + LEFT JOIN core.stable_transactions st + ON ndi.root_transaction_id = st.id + AND st.height < @height_threshold + LEFT JOIN core.missing_transactions mt + ON ndi.root_transaction_id = mt.transaction_id + AND mt.height < @height_threshold +) diff --git a/src/database/sql/bundles/flush.sql b/src/database/sql/bundles/flush.sql new file mode 100644 index 00000000..62fd3a49 --- /dev/null +++ b/src/database/sql/bundles/flush.sql @@ -0,0 +1,51 @@ +-- insertOrIgnoreStableDataItems +INSERT INTO stable_data_items ( + id, parent_id, root_transaction_id, + height, + block_transaction_index, + signature, anchor, owner_address, target, + data_offset, data_size, content_type, + tag_count, indexed_at +) +SELECT + ndi.id, ndi.parent_id, ndi.root_transaction_id, + IFNULL(st.height, mt.height), + IFNULL(st.block_transaction_index, sbt.block_transaction_index), + ndi.signature, ndi.anchor, ndi.owner_address, ndi.target, + ndi.data_offset, ndi.data_size, ndi.content_type, + ndi.tag_count, ndi.indexed_at +FROM new_data_items ndi +LEFT JOIN core.stable_transactions st + ON ndi.root_transaction_id = st.id + AND st.height < @end_height +LEFT JOIN core.missing_transactions mt + ON ndi.root_transaction_id = mt.transaction_id + AND mt.height < @end_height +LEFT JOIN core.stable_block_transactions sbt + ON mt.transaction_id = sbt.transaction_id +ON CONFLICT DO NOTHING + +-- insertOrIgnoreStableDataItemTags +INSERT INTO stable_data_item_tags ( + tag_name_hash, tag_value_hash, + height, + block_transaction_index, + data_item_tag_index, data_item_id, + parent_id, root_transaction_id +) SELECT + ndit.tag_name_hash, ndit.tag_value_hash, + IFNULL(st.height, mt.height), + IFNULL(st.block_transaction_index, sbt.block_transaction_index), + ndit.data_item_tag_index, ndit.data_item_id, + ndi.parent_id, ndi.root_transaction_id +FROM new_data_item_tags ndit +JOIN new_data_items ndi ON ndit.data_item_id = ndi.id +LEFT JOIN core.stable_transactions st + ON ndi.root_transaction_id = st.id + AND st.height < @end_height +LEFT JOIN core.missing_transactions mt + ON ndi.root_transaction_id = mt.transaction_id + AND mt.height < @end_height +LEFT JOIN core.stable_block_transactions sbt + ON mt.transaction_id = sbt.transaction_id +ON CONFLICT DO NOTHING diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index f553c614..d78fad4d 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -617,7 +617,7 @@ describe('StandaloneSqliteDatabase', () => { await chainSource.getBlockAndTxsByHeight(height); await db.saveBlockAndTxs(block, txs, missingTxIds); - dbWorker.saveStableDataFn(height + 1); + dbWorker.saveCoreStableDataFn(height + 1); const stats = await db.getDebugInfo(); expect(stats.counts.stableBlocks).to.equal(1); @@ -700,7 +700,7 @@ describe('StandaloneSqliteDatabase', () => { expect(stats.counts.newTxs).to.equal(txs.length); await db.saveBlockAndTxs(block, txs, missingTxIds); - dbWorker.saveStableDataFn(height + 1); + dbWorker.saveCoreStableDataFn(height + 1); const sql = ` SELECT sb.*, wo.public_modulus AS owner @@ -807,7 +807,7 @@ describe('StandaloneSqliteDatabase', () => { expect(stats.counts.newTxs).to.equal(txs.length); await db.saveBlockAndTxs(block, txs, missingTxIds); - dbWorker.saveStableDataFn(height + 1); + dbWorker.saveCoreStableDataFn(height + 1); const sql = ` SELECT sb.*, wo.public_modulus AS owner diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index b915a91d..51bd66e9 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -324,8 +324,10 @@ export class StandaloneSqliteDatabaseWorker { insertTxFn: Sqlite.Transaction; insertDataItemFn: Sqlite.Transaction; insertBlockAndTxsFn: Sqlite.Transaction; - saveStableDataFn: Sqlite.Transaction; - deleteStaleNewDataFn: Sqlite.Transaction; + saveCoreStableDataFn: Sqlite.Transaction; + saveBundlesStableDataFn: Sqlite.Transaction; + deleteCoreStaleNewDataFn: Sqlite.Transaction; + deleteBundlesStaleNewDataFn: Sqlite.Transaction; constructor({ coreDbPath, @@ -350,6 +352,8 @@ export class StandaloneSqliteDatabaseWorker { db.pragma('page_size = 4096'); // may depend on OS and FS } + this.dbs.bundles.exec(`ATTACH DATABASE '${coreDbPath}' AS core`); + this.stmts = { core: {}, data: {}, moderation: {}, bundles: {} }; for (const [stmtsKey, stmts] of Object.entries(this.stmts)) { @@ -417,7 +421,7 @@ export class StandaloneSqliteDatabaseWorker { }, ); - this.insertDataItemFn = this.dbs.core.transaction( + this.insertDataItemFn = this.dbs.bundles.transaction( (item: NormalizedDataItem, height?: number) => { // Insert the data item const rows = dataItemToDbRows(item); @@ -544,7 +548,7 @@ export class StandaloneSqliteDatabaseWorker { }, ); - this.saveStableDataFn = this.dbs.core.transaction((endHeight: number) => { + this.saveCoreStableDataFn = this.dbs.core.transaction((endHeight: number) => { this.stmts.core.insertOrIgnoreStableBlocks.run({ end_height: endHeight, }); @@ -562,7 +566,19 @@ export class StandaloneSqliteDatabaseWorker { }); }); - this.deleteStaleNewDataFn = this.dbs.core.transaction( + this.saveBundlesStableDataFn = this.dbs.bundles.transaction( + (endHeight: number) => { + this.stmts.bundles.insertOrIgnoreStableDataItems.run({ + end_height: endHeight, + }); + + this.stmts.bundles.insertOrIgnoreStableDataItemTags.run({ + end_height: endHeight, + }); + }, + ); + + this.deleteCoreStaleNewDataFn = this.dbs.core.transaction( (heightThreshold: number, createdAtThreshold: number) => { // Deletes missing_transactions that have been inserted asyncronously this.stmts.core.deleteStaleMissingTransactions.run({ @@ -588,6 +604,18 @@ export class StandaloneSqliteDatabaseWorker { }); }, ); + + this.deleteBundlesStaleNewDataFn = this.dbs.bundles.transaction( + (heightThreshold: number) => { + this.stmts.bundles.deleteStaleNewDataItems.run({ + height_threshold: heightThreshold, + }); + + this.stmts.bundles.deleteStaleNewDataItemTags.run({ + height_threshold: heightThreshold, + }); + }, + ); } getMaxHeight() { @@ -645,12 +673,14 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.core.selectMaxStableBlockTimestamp.get(); const endHeight = block.height - MAX_FORK_DEPTH; - this.saveStableDataFn(endHeight); + this.saveCoreStableDataFn(endHeight); + this.saveBundlesStableDataFn(endHeight); - this.deleteStaleNewDataFn( + this.deleteCoreStaleNewDataFn( endHeight, maxStableBlockTimestamp - NEW_TX_CLEANUP_WAIT_SECS, ); + this.deleteBundlesStaleNewDataFn(endHeight); } } From 98e76a911372e3299594c4c01f88a41e4c6e6c4c Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 23 May 2023 21:40:01 -0500 Subject: [PATCH 06/33] feat(bundles): improve data item height tracking + optimize stable flushing PE-3769 Clears the heights on data items > fork height when forks occur and updates data items related to L1 TXs when L1 TX heights are set. Also adds a height condition to the query for data items to flush to stable to avoid unnecessary work when joining to L1 stable tables to retrieve cannonical heights. Note: further optimization may still be possible. It may be possible to eliminate one of the joins by replacing it with a join to stable_block_transactions if we add height to stable_block_transactions. Though, it's unclear how much performance improvement that would yield. --- src/database/sql/bundles/async-import.sql | 13 ++++ src/database/sql/bundles/flush.sql | 2 + src/database/sql/bundles/height-reset.sql | 9 +++ src/database/sql/core/async-import.sql | 14 ----- src/database/sql/core/import.sql | 10 +++ src/database/standalone-sqlite.ts | 74 ++++++++++++++++------- 6 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 src/database/sql/bundles/async-import.sql create mode 100644 src/database/sql/bundles/height-reset.sql diff --git a/src/database/sql/bundles/async-import.sql b/src/database/sql/bundles/async-import.sql new file mode 100644 index 00000000..f8810380 --- /dev/null +++ b/src/database/sql/bundles/async-import.sql @@ -0,0 +1,13 @@ +-- selectTransactionHeight +SELECT height +FROM new_transactions +WHERE id = @transaction_id AND height IS NOT NULL +UNION +SELECT height +FROM stable_transactions +WHERE id = @transaction_id +UNION +SELECT height +FROM missing_transactions +WHERE transaction_id = @transaction_id +LIMIT 1 diff --git a/src/database/sql/bundles/flush.sql b/src/database/sql/bundles/flush.sql index 62fd3a49..6b7cad26 100644 --- a/src/database/sql/bundles/flush.sql +++ b/src/database/sql/bundles/flush.sql @@ -23,6 +23,7 @@ LEFT JOIN core.missing_transactions mt AND mt.height < @end_height LEFT JOIN core.stable_block_transactions sbt ON mt.transaction_id = sbt.transaction_id +WHERE ndi.height < @end_height ON CONFLICT DO NOTHING -- insertOrIgnoreStableDataItemTags @@ -48,4 +49,5 @@ LEFT JOIN core.missing_transactions mt AND mt.height < @end_height LEFT JOIN core.stable_block_transactions sbt ON mt.transaction_id = sbt.transaction_id +WHERE ndit.height < @end_height ON CONFLICT DO NOTHING diff --git a/src/database/sql/bundles/height-reset.sql b/src/database/sql/bundles/height-reset.sql new file mode 100644 index 00000000..48b9941f --- /dev/null +++ b/src/database/sql/bundles/height-reset.sql @@ -0,0 +1,9 @@ +-- clearHeightsOnNewDataItems +UPDATE new_data_items +SET height = NULL +WHERE height > @height + +-- clearHeightsOnNewDataItemTags +UPDATE new_data_item_tags +SET height = NULL +WHERE height > @height diff --git a/src/database/sql/core/async-import.sql b/src/database/sql/core/async-import.sql index 84f36a6f..0ee429ef 100644 --- a/src/database/sql/core/async-import.sql +++ b/src/database/sql/core/async-import.sql @@ -22,17 +22,3 @@ WHERE transaction_id = @transaction_id SELECT MAX(height)+1 FROM stable_blocks ) - --- selectTransactionHeight -SELECT height -FROM new_transactions -WHERE id = @transaction_id AND height IS NOT NULL -UNION -SELECT height -FROM stable_transactions -WHERE id = @transaction_id AND height IS NOT NULL -UNION -SELECT height -FROM missing_transactions -WHERE transaction_id = @transaction_id -LIMIT 1 diff --git a/src/database/sql/core/import.sql b/src/database/sql/core/import.sql index 105ceda3..198b02f9 100644 --- a/src/database/sql/core/import.sql +++ b/src/database/sql/core/import.sql @@ -1,3 +1,13 @@ +-- updateNewDataItemHeights +UPDATE bundles.new_data_items +SET height = @height +WHERE root_transaction_id = @transaction_id + +-- updateNewDataItemTagHeights +UPDATE bundles.new_data_item_tags +SET height = @height +WHERE root_transaction_id = @transaction_id + -- insertOrIgnoreWallet INSERT INTO wallets (address, public_modulus) VALUES (@address, @public_modulus) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 51bd66e9..7c9e920b 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -320,7 +320,8 @@ export class StandaloneSqliteDatabaseWorker { }; // Transactions - resetToHeightFn: Sqlite.Transaction; + resetBundlesToHeightFn: Sqlite.Transaction; + resetCoreToHeightFn: Sqlite.Transaction; insertTxFn: Sqlite.Transaction; insertDataItemFn: Sqlite.Transaction; insertBlockAndTxsFn: Sqlite.Transaction; @@ -352,6 +353,7 @@ export class StandaloneSqliteDatabaseWorker { db.pragma('page_size = 4096'); // may depend on OS and FS } + this.dbs.core.exec(`ATTACH DATABASE '${bundlesDbPath}' AS bundles`); this.dbs.bundles.exec(`ATTACH DATABASE '${coreDbPath}' AS core`); this.stmts = { core: {}, data: {}, moderation: {}, bundles: {} }; @@ -378,7 +380,12 @@ export class StandaloneSqliteDatabaseWorker { } // Transactions - this.resetToHeightFn = this.dbs.core.transaction((height: number) => { + this.resetBundlesToHeightFn = this.dbs.bundles.transaction((height: number) => { + this.stmts.bundles.clearHeightsOnNewDataItems.run({ height }); + this.stmts.bundles.clearHeightsOnNewDataItemTags.run({ height }); + }); + + this.resetCoreToHeightFn = this.dbs.core.transaction((height: number) => { this.stmts.core.clearHeightsOnNewTransactions.run({ height }); this.stmts.core.clearHeightsOnNewTransactionTags.run({ height }); this.stmts.core.truncateNewBlocksAt.run({ height }); @@ -388,8 +395,19 @@ export class StandaloneSqliteDatabaseWorker { this.insertTxFn = this.dbs.core.transaction( (tx: PartialJsonTransaction, height?: number) => { - // Insert the transaction - const rows = txToDbRows(tx); + const rows = txToDbRows(tx, height); + + if (height !== undefined) { + this.stmts.core.updateNewDataItemHeights.run({ + height, + transaction_id: rows.newTx.id, + }); + + this.stmts.core.updateNewDataItemTagHeights.run({ + height, + transaction_id: rows.newTx.id, + }); + } for (const row of rows.tagNames) { this.stmts.core.insertOrIgnoreTagName.run(row); @@ -423,8 +441,7 @@ export class StandaloneSqliteDatabaseWorker { this.insertDataItemFn = this.dbs.bundles.transaction( (item: NormalizedDataItem, height?: number) => { - // Insert the data item - const rows = dataItemToDbRows(item); + const rows = dataItemToDbRows(item, height); for (const row of rows.tagNames) { this.stmts.bundles.insertOrIgnoreTagName.run(row); @@ -514,6 +531,16 @@ export class StandaloneSqliteDatabaseWorker { for (const tx of txs) { const rows = txToDbRows(tx, block.height); + this.stmts.core.updateNewDataItemHeights.run({ + height: block.height, + transaction_id: rows.newTx.id, + }); + + this.stmts.core.updateNewDataItemTagHeights.run({ + height: block.height, + transaction_id: rows.newTx.id, + }); + for (const row of rows.tagNames) { this.stmts.core.insertOrIgnoreTagName.run(row); } @@ -548,23 +575,25 @@ export class StandaloneSqliteDatabaseWorker { }, ); - this.saveCoreStableDataFn = this.dbs.core.transaction((endHeight: number) => { - this.stmts.core.insertOrIgnoreStableBlocks.run({ - end_height: endHeight, - }); + this.saveCoreStableDataFn = this.dbs.core.transaction( + (endHeight: number) => { + this.stmts.core.insertOrIgnoreStableBlocks.run({ + end_height: endHeight, + }); - this.stmts.core.insertOrIgnoreStableBlockTransactions.run({ - end_height: endHeight, - }); + this.stmts.core.insertOrIgnoreStableBlockTransactions.run({ + end_height: endHeight, + }); - this.stmts.core.insertOrIgnoreStableTransactions.run({ - end_height: endHeight, - }); + this.stmts.core.insertOrIgnoreStableTransactions.run({ + end_height: endHeight, + }); - this.stmts.core.insertOrIgnoreStableTransactionTags.run({ - end_height: endHeight, - }); - }); + this.stmts.core.insertOrIgnoreStableTransactionTags.run({ + end_height: endHeight, + }); + }, + ); this.saveBundlesStableDataFn = this.dbs.bundles.transaction( (endHeight: number) => { @@ -641,7 +670,8 @@ export class StandaloneSqliteDatabaseWorker { } resetToHeight(height: number) { - this.resetToHeightFn(height); + this.resetBundlesToHeightFn(height); + this.resetCoreToHeightFn(height); } saveTx(tx: PartialJsonTransaction) { @@ -655,7 +685,7 @@ export class StandaloneSqliteDatabaseWorker { saveDataItem(item: NormalizedDataItem) { const rootTxId = fromB64Url(item.root_tx_id); - const maybeTxHeight = this.stmts.core.selectTransactionHeight.get({ + const maybeTxHeight = this.stmts.bundles.selectTransactionHeight.get({ transaction_id: rootTxId, })?.height; this.insertDataItemFn(item, maybeTxHeight); From 4534830faeba4d838ed2a33ec86276969230bf32 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 24 May 2023 14:13:01 -0500 Subject: [PATCH 07/33] fix(bundles): set data item heights even when L1 TX retrieval fails PE-3769 Sometimes we can't fetch transactions when indexing a block. In those cases we still know the height, so we should ensure the height is set on any associated data items. --- src/database/standalone-sqlite.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 7c9e920b..cebb2770 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -566,6 +566,16 @@ export class StandaloneSqliteDatabaseWorker { for (const txIdStr of missingTxIds) { const txId = fromB64Url(txIdStr); + this.stmts.core.updateNewDataItemHeights.run({ + height: block.height, + transaction_id: txId, + }); + + this.stmts.core.updateNewDataItemTagHeights.run({ + height: block.height, + transaction_id: txId, + }); + this.stmts.core.insertOrIgnoreMissingTransaction.run({ block_indep_hash: indepHash, transaction_id: txId, From e5122f11663b5af8bc7b6c31ff8c439f164f1548 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 24 May 2023 18:04:44 -0500 Subject: [PATCH 08/33] perf(sqlite bundles): remove more data item flushing joins PE-3769 Further simplifies joins when copying new data items to the stable tables and cleaning up stale data items. Rather than getting height from stable L1 tables, we rely on height on the new data items and only join to stable L1 tables to get the block transaction index. --- src/database/sql/bundles/cleanup.sql | 28 ++++++------------- src/database/sql/bundles/flush.sql | 40 +++++++++------------------- src/database/standalone-sqlite.ts | 10 +++++-- 3 files changed, 28 insertions(+), 50 deletions(-) diff --git a/src/database/sql/bundles/cleanup.sql b/src/database/sql/bundles/cleanup.sql index e9d8e72b..81bb9216 100644 --- a/src/database/sql/bundles/cleanup.sql +++ b/src/database/sql/bundles/cleanup.sql @@ -1,25 +1,13 @@ -- deleteStaleNewDataItems DELETE FROM new_data_items -WHERE id IN ( - SELECT DISTINCT ndi.id - FROM new_data_items ndi - LEFT JOIN core.stable_transactions st - ON ndi.root_transaction_id = st.id - AND st.height < @height_threshold - LEFT JOIN core.missing_transactions mt - ON ndi.root_transaction_id = mt.transaction_id - AND mt.height < @height_threshold -) +WHERE height < @height_threshold OR ( + height IS NULL AND + indexed_at < @indexed_at_threshold + ) -- deleteStaleNewDataItemTags DELETE FROM new_data_item_tags -WHERE data_item_id IN ( - SELECT DISTINCT ndi.id - FROM new_data_items ndi - LEFT JOIN core.stable_transactions st - ON ndi.root_transaction_id = st.id - AND st.height < @height_threshold - LEFT JOIN core.missing_transactions mt - ON ndi.root_transaction_id = mt.transaction_id - AND mt.height < @height_threshold -) +WHERE height < @height_threshold OR ( + height IS NULL AND + indexed_at < @indexed_at_threshold + ) diff --git a/src/database/sql/bundles/flush.sql b/src/database/sql/bundles/flush.sql index 6b7cad26..f148015b 100644 --- a/src/database/sql/bundles/flush.sql +++ b/src/database/sql/bundles/flush.sql @@ -1,53 +1,37 @@ -- insertOrIgnoreStableDataItems INSERT INTO stable_data_items ( id, parent_id, root_transaction_id, - height, - block_transaction_index, + height, block_transaction_index, signature, anchor, owner_address, target, data_offset, data_size, content_type, tag_count, indexed_at -) -SELECT +) SELECT ndi.id, ndi.parent_id, ndi.root_transaction_id, - IFNULL(st.height, mt.height), - IFNULL(st.block_transaction_index, sbt.block_transaction_index), + ndi.height, sbt.block_transaction_index, ndi.signature, ndi.anchor, ndi.owner_address, ndi.target, ndi.data_offset, ndi.data_size, ndi.content_type, ndi.tag_count, ndi.indexed_at FROM new_data_items ndi -LEFT JOIN core.stable_transactions st - ON ndi.root_transaction_id = st.id - AND st.height < @end_height -LEFT JOIN core.missing_transactions mt - ON ndi.root_transaction_id = mt.transaction_id - AND mt.height < @end_height -LEFT JOIN core.stable_block_transactions sbt - ON mt.transaction_id = sbt.transaction_id +JOIN core.stable_block_transactions sbt + ON ndi.root_transaction_id = sbt.transaction_id WHERE ndi.height < @end_height ON CONFLICT DO NOTHING -- insertOrIgnoreStableDataItemTags INSERT INTO stable_data_item_tags ( tag_name_hash, tag_value_hash, - height, - block_transaction_index, + height, block_transaction_index, data_item_tag_index, data_item_id, parent_id, root_transaction_id ) SELECT ndit.tag_name_hash, ndit.tag_value_hash, - IFNULL(st.height, mt.height), - IFNULL(st.block_transaction_index, sbt.block_transaction_index), + ndit.height, sbt.block_transaction_index, ndit.data_item_tag_index, ndit.data_item_id, - ndi.parent_id, ndi.root_transaction_id + ndi.parent_id, ndit.root_transaction_id FROM new_data_item_tags ndit -JOIN new_data_items ndi ON ndit.data_item_id = ndi.id -LEFT JOIN core.stable_transactions st - ON ndi.root_transaction_id = st.id - AND st.height < @end_height -LEFT JOIN core.missing_transactions mt - ON ndi.root_transaction_id = mt.transaction_id - AND mt.height < @end_height -LEFT JOIN core.stable_block_transactions sbt - ON mt.transaction_id = sbt.transaction_id +JOIN new_data_items ndi + ON ndit.data_item_id = ndi.id +JOIN core.stable_block_transactions sbt + ON ndit.root_transaction_id = sbt.transaction_id WHERE ndit.height < @end_height ON CONFLICT DO NOTHING diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index cebb2770..9083d347 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -59,6 +59,7 @@ const CPU_COUNT = os.cpus().length; const STABLE_FLUSH_INTERVAL = 5; const NEW_TX_CLEANUP_WAIT_SECS = 60 * 60 * 2; +const NEW_DATA_ITEM_CLEANUP_WAIT_SECS = 60 * 60 * 2; const LOW_SELECTIVITY_TAG_NAMES = new Set(['App-Name', 'Content-Type']); function tagJoinSortPriority(tag: { name: string; values: string[] }) { @@ -645,13 +646,15 @@ export class StandaloneSqliteDatabaseWorker { ); this.deleteBundlesStaleNewDataFn = this.dbs.bundles.transaction( - (heightThreshold: number) => { + (heightThreshold: number, indexedAtThreshold: number) => { this.stmts.bundles.deleteStaleNewDataItems.run({ height_threshold: heightThreshold, + indexed_at_threshold: indexedAtThreshold, }); this.stmts.bundles.deleteStaleNewDataItemTags.run({ height_threshold: heightThreshold, + indexed_at_threshold: indexedAtThreshold, }); }, ); @@ -720,7 +723,10 @@ export class StandaloneSqliteDatabaseWorker { endHeight, maxStableBlockTimestamp - NEW_TX_CLEANUP_WAIT_SECS, ); - this.deleteBundlesStaleNewDataFn(endHeight); + this.deleteBundlesStaleNewDataFn( + endHeight, + maxStableBlockTimestamp - NEW_DATA_ITEM_CLEANUP_WAIT_SECS, + ); } } From adad9a3c7e0138f012372edd4dd41043a833ee16 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 30 May 2023 17:10:13 -0500 Subject: [PATCH 09/33] feat(sqlite bundles): add ability to query stable data items PE-3769 Combines stable transactions and stable data items using a UNION in the SQL query. Each subquery in the UNION has its own ORDER BY and LIMIT. This allows the sub-selects to do most of the work before the union is computed. This change also implements returning parent/bundleIn for data items. However, filtering based on bundledIn and sorting data items by ID are not functional yet and will be implemented in future commits. --- src/database/standalone-sqlite.ts | 124 ++++++++++++++++++++++++------ src/routes/graphql/resolvers.ts | 10 +-- src/types.d.ts | 1 + 3 files changed, 106 insertions(+), 29 deletions(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 9083d347..113bbfbb 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -381,10 +381,12 @@ export class StandaloneSqliteDatabaseWorker { } // Transactions - this.resetBundlesToHeightFn = this.dbs.bundles.transaction((height: number) => { - this.stmts.bundles.clearHeightsOnNewDataItems.run({ height }); - this.stmts.bundles.clearHeightsOnNewDataItemTags.run({ height }); - }); + this.resetBundlesToHeightFn = this.dbs.bundles.transaction( + (height: number) => { + this.stmts.bundles.clearHeightsOnNewDataItems.run({ height }); + this.stmts.bundles.clearHeightsOnNewDataItemTags.run({ height }); + }, + ); this.resetCoreToHeightFn = this.dbs.core.transaction((height: number) => { this.stmts.core.clearHeightsOnNewTransactions.run({ height }); @@ -883,7 +885,7 @@ export class StandaloneSqliteDatabaseWorker { 'nt.height AS height', 'nbt.block_transaction_index AS block_transaction_index', 'id', - 'last_tx', + 'last_tx AS anchor', 'signature', 'target', 'CAST(reward AS TEXT) AS reward', @@ -914,7 +916,7 @@ export class StandaloneSqliteDatabaseWorker { 'st.height AS height', 'st.block_transaction_index AS block_transaction_index', 'id', - 'last_tx', + 'last_tx AS anchor', 'signature', 'target', 'CAST(reward AS TEXT) AS reward', @@ -926,6 +928,7 @@ export class StandaloneSqliteDatabaseWorker { 'sb.indep_hash AS block_indep_hash', 'sb.block_timestamp AS block_timestamp', 'sb.previous_block AS block_previous_block', + "'' AS parent_id", ) .from('stable_transactions st') .join('stable_blocks sb', { @@ -936,6 +939,35 @@ export class StandaloneSqliteDatabaseWorker { }); } + getGqlStableDataItemsBaseSql() { + return sql + .select( + 'sdi.height AS height', + 'sdi.block_transaction_index AS block_transaction_index', + 'id', + 'anchor', + 'signature', + 'target', + "'' AS reward", + "'' AS quantity", + 'CAST(data_size AS TEXT) AS data_size', + 'content_type', + 'owner_address', + 'public_modulus', + 'sb.indep_hash AS block_indep_hash', + 'sb.block_timestamp AS block_timestamp', + 'sb.previous_block AS block_previous_block', + 'sdi.parent_id', + ) + .from('bundles.stable_data_items sdi') + .join('stable_blocks sb', { + 'sdi.height': 'sb.height', + }) + .join('bundles.wallets w', { + 'sdi.owner_address': 'w.address', + }); + } + addGqlTransactionFilters({ query, source, @@ -949,7 +981,7 @@ export class StandaloneSqliteDatabaseWorker { tags = [], }: { query: sql.SelectStatement; - source: 'stable' | 'new'; + source: 'stable_txs' | 'stable_items' | 'new_txs' | 'new_items'; cursor?: string; sortOrder?: 'HEIGHT_DESC' | 'HEIGHT_ASC'; ids?: string[]; @@ -963,19 +995,34 @@ export class StandaloneSqliteDatabaseWorker { let heightTableAlias: string; let blockTransactionIndexTableAlias: string; let tagsTable: string; + let tagIdColumn: string; + let tagIndexColumn: string; let heightSortTableAlias: string; let blockTransactionIndexSortTableAlias: string; let maxDbHeight = Infinity; - if (source === 'stable') { + if (source === 'stable_txs') { txTableAlias = 'st'; heightTableAlias = 'st'; blockTransactionIndexTableAlias = 'st'; tagsTable = 'stable_transaction_tags'; + tagIdColumn = 'transaction_id'; + tagIndexColumn = 'transaction_tag_index'; heightSortTableAlias = 'st'; blockTransactionIndexSortTableAlias = 'st'; maxDbHeight = this.stmts.core.selectMaxStableBlockHeight.get() .height as number; + } else if (source === 'stable_items') { + txTableAlias = 'sdi'; + heightTableAlias = 'sdi'; + blockTransactionIndexTableAlias = 'sdi'; + tagsTable = 'stable_data_item_tags'; + tagIdColumn = 'data_item_id'; + tagIndexColumn = 'data_item_tag_index'; + heightSortTableAlias = 'sdi'; + blockTransactionIndexSortTableAlias = 'sdi'; + maxDbHeight = this.stmts.core.selectMaxStableBlockHeight.get() + .height as number; } else { txTableAlias = 'nt'; heightTableAlias = 'nt'; @@ -1000,12 +1047,12 @@ export class StandaloneSqliteDatabaseWorker { } if (tags) { - // To improve performance, force tags with large result to be last + // To improve performance, force tags with large result sets to be last const sortByTagJoinPriority = R.sortBy(tagJoinSortPriority); sortByTagJoinPriority(tags).forEach((tag, index) => { const tagAlias = `"${index}_${index}"`; let joinCond: { [key: string]: string }; - if (source === 'stable') { + if (source === 'stable_txs' || source === 'stable_items') { if (index === 0) { heightSortTableAlias = tagAlias; blockTransactionIndexSortTableAlias = tagAlias; @@ -1016,7 +1063,7 @@ export class StandaloneSqliteDatabaseWorker { } else { const previousTagAlias = `"${index - 1}_${index - 1}"`; joinCond = { - [`${previousTagAlias}.transaction_id`]: `${tagAlias}.transaction_id`, + [`${previousTagAlias}.${tagIdColumn}`]: `${tagAlias}.${tagIdColumn}`, }; // This condition forces the use of the transaction_id index rather // than the name and value index. The transaction_id index is @@ -1024,8 +1071,8 @@ export class StandaloneSqliteDatabaseWorker { // first in the GraphQL query. query.where( sql.notEq( - `${previousTagAlias}.transaction_tag_index`, - sql(`${tagAlias}.transaction_tag_index`), + `${previousTagAlias}.${tagIndexColumn}`, + sql(`${tagAlias}.${tagIndexColumn}`), ), ); } @@ -1122,7 +1169,7 @@ export class StandaloneSqliteDatabaseWorker { this.addGqlTransactionFilters({ query, - source: 'new', + source: 'new_txs', cursor, sortOrder, ids, @@ -1144,7 +1191,7 @@ export class StandaloneSqliteDatabaseWorker { height: tx.height, blockTransactionIndex: tx.block_transaction_index, id: toB64Url(tx.id), - anchor: toB64Url(tx.last_tx), + anchor: toB64Url(tx.anchor), signature: toB64Url(tx.signature), recipient: tx.target ? toB64Url(tx.target) : undefined, ownerAddress: toB64Url(tx.owner_address), @@ -1157,6 +1204,7 @@ export class StandaloneSqliteDatabaseWorker { blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, blockPreviousBlock: toB64Url(tx.block_previous_block), + parentId: '', // TODO implement this })); } @@ -1181,11 +1229,11 @@ export class StandaloneSqliteDatabaseWorker { maxHeight?: number; tags?: { name: string; values: string[] }[]; }) { - const query = this.getGqlStableTransactionsBaseSql(); + const txsQuery = this.getGqlStableTransactionsBaseSql(); this.addGqlTransactionFilters({ - query, - source: 'stable', + query: txsQuery, + source: 'stable_txs', cursor, sortOrder, ids, @@ -1196,18 +1244,47 @@ export class StandaloneSqliteDatabaseWorker { tags, }); - const queryParams = query.toParams(); - const sql = queryParams.text; - const sqliteParams = toSqliteParams(queryParams); + const txsQueryParams = txsQuery.toParams(); + const txsSql = txsQueryParams.text; + const txsFinalSql = `${txsSql} LIMIT ${pageSize + 1}`; + + const itemsQuery = this.getGqlStableDataItemsBaseSql(); + + this.addGqlTransactionFilters({ + query: itemsQuery, + source: 'stable_items', + cursor, + sortOrder, + ids, + recipients, + owners, + minHeight, + maxHeight, + tags, + }); + + const itemsQueryParams = itemsQuery.toParams(); + const itemsSql = itemsQueryParams.text; + const itemsFinalSql = `${itemsSql} LIMIT ${pageSize + 1}`; + + const sqlSortOrder = sortOrder === 'HEIGHT_DESC' ? 'DESC' : 'ASC'; + const sql: string = ` + SELECT * FROM (${txsFinalSql}) + UNION + SELECT * FROM (${itemsFinalSql}) + ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder} + LIMIT ${pageSize + 1} + `; + const sqliteParams = toSqliteParams(txsQueryParams); return this.dbs.core - .prepare(`${sql} LIMIT ${pageSize + 1}`) + .prepare(sql) .all(sqliteParams) .map((tx) => ({ height: tx.height, blockTransactionIndex: tx.block_transaction_index, id: toB64Url(tx.id), - anchor: toB64Url(tx.last_tx), + anchor: toB64Url(tx.anchor), signature: toB64Url(tx.signature), recipient: tx.target ? toB64Url(tx.target) : undefined, ownerAddress: toB64Url(tx.owner_address), @@ -1220,6 +1297,7 @@ export class StandaloneSqliteDatabaseWorker { blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, blockPreviousBlock: toB64Url(tx.block_previous_block), + parentId: tx.parent_id ? toB64Url(tx.parent_id) : '', })); } diff --git a/src/routes/graphql/resolvers.ts b/src/routes/graphql/resolvers.ts index e5157309..f6ccb0aa 100644 --- a/src/routes/graphql/resolvers.ts +++ b/src/routes/graphql/resolvers.ts @@ -64,17 +64,15 @@ export function resolveTxOwner(tx: GqlTransaction) { }; } -// TODO implement when L2 data is added -export function resolveTxParent() { +export function resolveTxParent(tx: GqlTransaction) { return { - id: '', + id: tx.parentId, }; } -// TODO implement when L2 data is added -export function resolveTxBundledIn() { +export function resolveTxBundledIn(tx: GqlTransaction) { return { - id: '', + id: tx.parentId, }; } diff --git a/src/types.d.ts b/src/types.d.ts index 14bd587f..fef8a7e7 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -231,6 +231,7 @@ interface GqlTransaction { blockTimestamp: number | undefined; height: number | undefined; blockPreviousBlock: string | undefined; + parentId: string | undefined; } interface GqlTransactionEdge { From 3c904ca0477c7bcfa0efef57c8f3bb5c63d50d34 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 31 May 2023 15:59:21 -0500 Subject: [PATCH 10/33] feat(sqlite graphql): include data items in sorting and cursors PE-3769 Adds data items to GQL sorting and cursors. Data items are sorted by ID after block height and block TX index. ID was chosen as opposed to bundle offsets or indexes because we want duplicates of the same item sorted consistently where possible. Also, bundle data item indexes are potentially confusing when data item filtering is used. In order to accomplish this, the cursor condition in the query was changed from a simple numeric comparison to a set of comparisons against the cursor components. An OR is required in the comparisons to avoid comparing against irrelevant conditions (e.g. block TX index comparison when height > cursor height). This clutters the WHERE conditions, but is still fairly readable. Also it may perform better since it makes the height comparison legible to SQLite. --- src/database/standalone-sqlite.test.ts | 11 ++- src/database/standalone-sqlite.ts | 98 ++++++++++++++++++++------ 2 files changed, 86 insertions(+), 23 deletions(-) diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index d78fad4d..023aa2ec 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -52,6 +52,8 @@ const { default: processStream } = arbundles; const HEIGHT = 1138; const BLOCK_TX_INDEX = 42; +const DATA_ITEM_ID = 'zoljIRyzG5hp-R4EZV2q8kFI49OAoy23_B9YJ_yEEws'; +const CURSOR = 'WzExMzgsNDIsInpvbGpJUnl6RzVocC1SNEVaVjJxOGtGSTQ5T0FveTIzX0I5WUpfeUVFd3MiXQ'; describe('SQLite helper functions', () => { describe('toSqliteParams', () => { @@ -71,23 +73,26 @@ describe('SQLite GraphQL cursor functions', () => { encodeTransactionGqlCursor({ height: HEIGHT, blockTransactionIndex: BLOCK_TX_INDEX, + dataItemId: DATA_ITEM_ID, }), - ).to.equal('WzExMzgsNDJd'); + ).to.equal(CURSOR); }); }); describe('decodeTransactionGqlCursor', () => { it('should decode a height and blockTransactionIndex given an encoded cursor', () => { - expect(decodeTransactionGqlCursor('WzExMzgsNDJd')).to.deep.equal({ + expect(decodeTransactionGqlCursor(CURSOR)).to.deep.equal({ height: HEIGHT, blockTransactionIndex: BLOCK_TX_INDEX, + dataItemId: DATA_ITEM_ID, }); }); - it('should return an undefined height and blockTransactionIndex given an undefined cursor', () => { + it('should return an undefined height, blockTransactionIndex, and dataItemId given an undefined cursor', () => { expect(decodeTransactionGqlCursor(undefined)).to.deep.equal({ height: undefined, blockTransactionIndex: undefined, + dataItemId: undefined, }); }); diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 113bbfbb..3742bb1d 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -69,24 +69,32 @@ function tagJoinSortPriority(tag: { name: string; values: string[] }) { export function encodeTransactionGqlCursor({ height, blockTransactionIndex, + dataItemId, }: { height: number; blockTransactionIndex: number; + dataItemId: string | undefined; }) { - return utf8ToB64Url(JSON.stringify([height, blockTransactionIndex])); + return utf8ToB64Url( + JSON.stringify([height, blockTransactionIndex, dataItemId]), + ); } export function decodeTransactionGqlCursor(cursor: string | undefined) { try { if (!cursor) { - return { height: undefined, blockTransactionIndex: undefined }; + return { + height: undefined, + blockTransactionIndex: undefined, + dataItemId: undefined, + }; } - const [height, blockTransactionIndex] = JSON.parse( + const [height, blockTransactionIndex, dataItemId] = JSON.parse( b64UrlToUtf8(cursor), - ) as [number, number]; + ) as [number, number, string | undefined]; - return { height, blockTransactionIndex }; + return { height, blockTransactionIndex, dataItemId }; } catch (error) { throw new ValidationError('Invalid transaction cursor'); } @@ -884,6 +892,7 @@ export class StandaloneSqliteDatabaseWorker { .select( 'nt.height AS height', 'nbt.block_transaction_index AS block_transaction_index', + "x'00' AS data_item_id", 'id', 'last_tx AS anchor', 'signature', @@ -915,6 +924,7 @@ export class StandaloneSqliteDatabaseWorker { .select( 'st.height AS height', 'st.block_transaction_index AS block_transaction_index', + "x'00' AS data_item_id", 'id', 'last_tx AS anchor', 'signature', @@ -944,6 +954,7 @@ export class StandaloneSqliteDatabaseWorker { .select( 'sdi.height AS height', 'sdi.block_transaction_index AS block_transaction_index', + 'sdi.id AS data_item_id', 'id', 'anchor', 'signature', @@ -1115,32 +1126,77 @@ export class StandaloneSqliteDatabaseWorker { const { height: cursorHeight, blockTransactionIndex: cursorBlockTransactionIndex, + dataItemId: cursorDataItemId, } = decodeTransactionGqlCursor(cursor); if (sortOrder === 'HEIGHT_DESC') { - if (cursorHeight) { + if ( + cursorHeight != undefined && + cursorBlockTransactionIndex != undefined + ) { + let dataItemIdField = source === 'stable_items' ? 'sdi.id' : "x'00'"; query.where( - sql.lt( - `${heightSortTableAlias}.height * 1000 + ${blockTransactionIndexSortTableAlias}.block_transaction_index`, - cursorHeight * 1000 + cursorBlockTransactionIndex ?? 0, + sql.lte(`${heightSortTableAlias}.height`, cursorHeight), + sql.or( + sql.lt(`${heightSortTableAlias}.height`, cursorHeight), + sql.and( + sql.eq(`${heightSortTableAlias}.height`, cursorHeight), + sql.lt( + `${blockTransactionIndexSortTableAlias}.block_transaction_index`, + cursorBlockTransactionIndex, + ), + ), + sql.and( + sql.eq(`${heightSortTableAlias}.height`, cursorHeight), + sql.eq( + `${blockTransactionIndexSortTableAlias}.block_transaction_index`, + cursorBlockTransactionIndex, + ), + sql.lt( + dataItemIdField, + cursorDataItemId + ? fromB64Url(cursorDataItemId) + : Buffer.from([0]), + ), + ), ), ); } - query.orderBy( - `${heightSortTableAlias}.height DESC, ${blockTransactionIndexSortTableAlias}.block_transaction_index DESC`, - ); + query.orderBy('1 DESC, 2 DESC, 3 DESC'); } else { - if (cursorHeight) { + if ( + cursorHeight != undefined && + cursorBlockTransactionIndex != undefined + ) { + let dataItemIdField = source === 'stable_items' ? 'sdi.id' : "x'00'"; query.where( - sql.gt( - `${heightSortTableAlias}.height * 1000 + ${blockTransactionIndexSortTableAlias}.block_transaction_index`, - cursorHeight * 1000 + cursorBlockTransactionIndex ?? 0, + sql.gte(`${heightSortTableAlias}.height`, cursorHeight), + sql.or( + sql.gt(`${heightSortTableAlias}.height`, cursorHeight), + sql.and( + sql.eq(`${heightSortTableAlias}.height`, cursorHeight), + sql.gt( + `${blockTransactionIndexSortTableAlias}.block_transaction_index`, + cursorBlockTransactionIndex, + ), + ), + sql.and( + sql.eq(`${heightSortTableAlias}.height`, cursorHeight), + sql.eq( + `${blockTransactionIndexSortTableAlias}.block_transaction_index`, + cursorBlockTransactionIndex, + ), + sql.gt( + dataItemIdField, + cursorDataItemId + ? fromB64Url(cursorDataItemId) + : Buffer.from([0]), + ), + ), ), ); } - query.orderBy( - `${heightSortTableAlias}.height ASC, ${blockTransactionIndexSortTableAlias}.block_transaction_index ASC`, - ); + query.orderBy('1 ASC, 2 ASC, 3 ASC'); } } @@ -1190,6 +1246,7 @@ export class StandaloneSqliteDatabaseWorker { .map((tx) => ({ height: tx.height, blockTransactionIndex: tx.block_transaction_index, + dataItemId: tx.data_item_id ? toB64Url(tx.data_item_id) : undefined, id: toB64Url(tx.id), anchor: toB64Url(tx.anchor), signature: toB64Url(tx.signature), @@ -1272,7 +1329,7 @@ export class StandaloneSqliteDatabaseWorker { SELECT * FROM (${txsFinalSql}) UNION SELECT * FROM (${itemsFinalSql}) - ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder} + ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder}, 3 ${sqlSortOrder} LIMIT ${pageSize + 1} `; const sqliteParams = toSqliteParams(txsQueryParams); @@ -1283,6 +1340,7 @@ export class StandaloneSqliteDatabaseWorker { .map((tx) => ({ height: tx.height, blockTransactionIndex: tx.block_transaction_index, + dataItemId: tx.data_item_id ? toB64Url(tx.data_item_id) : undefined, id: toB64Url(tx.id), anchor: toB64Url(tx.anchor), signature: toB64Url(tx.signature), From 3fdf15a48e56dd2da63ef5eb8a101ee4c609eef7 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 31 May 2023 17:18:39 -0500 Subject: [PATCH 11/33] feat(sqlite graphql): add bundledIn/parent filter support PE-3769 Implements the GraphQL bundledIn/parent filter (parent is depricated). Filtering on 'null' matches only L1 transactions. Data items queries are skipped in that case. This ensures users do not pay a performance penalty if they only want to query L1. Similarly, L1 transactions are skipped if a bundledIn filter is specified. --- src/database/standalone-sqlite.ts | 65 +++++++++++++++++++++++-------- src/routes/graphql/resolvers.ts | 12 +++++- src/types.d.ts | 2 +- 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 3742bb1d..bec542c9 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -49,6 +49,7 @@ import { ContiguousDataAttributes, ContiguousDataIndex, GqlQueryable, + GqlTransaction, NestedDataIndexWriter, NormalizedDataItem, PartialJsonBlock, @@ -71,9 +72,9 @@ export function encodeTransactionGqlCursor({ blockTransactionIndex, dataItemId, }: { - height: number; - blockTransactionIndex: number; - dataItemId: string | undefined; + height?: number; + blockTransactionIndex?: number; + dataItemId?: string; }) { return utf8ToB64Url( JSON.stringify([height, blockTransactionIndex, dataItemId]), @@ -989,6 +990,7 @@ export class StandaloneSqliteDatabaseWorker { owners = [], minHeight = -1, maxHeight = -1, + bundledIn, tags = [], }: { query: sql.SelectStatement; @@ -1000,6 +1002,7 @@ export class StandaloneSqliteDatabaseWorker { owners?: string[]; minHeight?: number; maxHeight?: number; + bundledIn?: string[] | null; tags: { name: string; values: string[] }[]; }) { let txTableAlias: string; @@ -1123,6 +1126,12 @@ export class StandaloneSqliteDatabaseWorker { query.where(sql.lte(`${heightTableAlias}.height`, maxHeight)); } + if (Array.isArray(bundledIn) && source === 'stable_items') { + query.where( + sql.in(`${txTableAlias}.parent_id`, bundledIn.map(fromB64Url)), + ); + } + const { height: cursorHeight, blockTransactionIndex: cursorBlockTransactionIndex, @@ -1219,6 +1228,7 @@ export class StandaloneSqliteDatabaseWorker { owners?: string[]; minHeight?: number; maxHeight?: number; + bundledIn?: string[] | null; tags?: { name: string; values: string[] }[]; }) { const query = this.getGqlNewTransactionsBaseSql(); @@ -1261,7 +1271,7 @@ export class StandaloneSqliteDatabaseWorker { blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, blockPreviousBlock: toB64Url(tx.block_previous_block), - parentId: '', // TODO implement this + parentId: null, // TODO implement this })); } @@ -1274,6 +1284,7 @@ export class StandaloneSqliteDatabaseWorker { owners = [], minHeight = -1, maxHeight = -1, + bundledIn, tags = [], }: { pageSize: number; @@ -1284,6 +1295,7 @@ export class StandaloneSqliteDatabaseWorker { owners?: string[]; minHeight?: number; maxHeight?: number; + bundledIn?: string[] | null; tags?: { name: string; values: string[] }[]; }) { const txsQuery = this.getGqlStableTransactionsBaseSql(); @@ -1298,6 +1310,7 @@ export class StandaloneSqliteDatabaseWorker { owners, minHeight, maxHeight, + bundledIn, tags, }); @@ -1317,6 +1330,7 @@ export class StandaloneSqliteDatabaseWorker { owners, minHeight, maxHeight, + bundledIn, tags, }); @@ -1325,14 +1339,22 @@ export class StandaloneSqliteDatabaseWorker { const itemsFinalSql = `${itemsSql} LIMIT ${pageSize + 1}`; const sqlSortOrder = sortOrder === 'HEIGHT_DESC' ? 'DESC' : 'ASC'; - const sql: string = ` - SELECT * FROM (${txsFinalSql}) - UNION - SELECT * FROM (${itemsFinalSql}) - ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder}, 3 ${sqlSortOrder} - LIMIT ${pageSize + 1} - `; - const sqliteParams = toSqliteParams(txsQueryParams); + const sqlParts = []; + if (bundledIn === undefined || bundledIn === null) { + sqlParts.push(`SELECT * FROM (${txsFinalSql})`); + } + if (bundledIn === undefined) { + sqlParts.push('UNION'); + } + if (bundledIn === undefined || Array.isArray(bundledIn)) { + sqlParts.push(`SELECT * FROM (${itemsFinalSql})`); + } + sqlParts.push( + `ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder}, 3 ${sqlSortOrder}`, + ); + sqlParts.push(`LIMIT ${pageSize + 1}`); + const sql = sqlParts.join(' '); + const sqliteParams = toSqliteParams(itemsQueryParams); return this.dbs.core .prepare(sql) @@ -1355,7 +1377,7 @@ export class StandaloneSqliteDatabaseWorker { blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, blockPreviousBlock: toB64Url(tx.block_previous_block), - parentId: tx.parent_id ? toB64Url(tx.parent_id) : '', + parentId: tx.parent_id ? toB64Url(tx.parent_id) : null, })); } @@ -1368,6 +1390,7 @@ export class StandaloneSqliteDatabaseWorker { owners = [], minHeight = -1, maxHeight = -1, + bundledIn, tags = [], }: { pageSize: number; @@ -1378,9 +1401,10 @@ export class StandaloneSqliteDatabaseWorker { owners?: string[]; minHeight?: number; maxHeight?: number; + bundledIn?: string[] | null; tags?: { name: string; values: string[] }[]; }) { - let txs; + let txs: GqlTransaction[] = []; if (sortOrder === 'HEIGHT_DESC') { txs = this.getGqlNewTransactions({ @@ -1392,10 +1416,12 @@ export class StandaloneSqliteDatabaseWorker { owners, minHeight, maxHeight, + bundledIn, tags, }); if (txs.length < pageSize) { + const lastTxHeight = txs[txs.length - 1].height; txs = txs.concat( this.getGqlStableTransactions({ pageSize, @@ -1406,7 +1432,8 @@ export class StandaloneSqliteDatabaseWorker { owners, minHeight, maxHeight: - txs.length > 0 ? txs[txs.length - 1].height - 1 : maxHeight, + txs.length > 0 && lastTxHeight ? lastTxHeight - 1 : maxHeight, + bundledIn, tags, }), ); @@ -1421,10 +1448,12 @@ export class StandaloneSqliteDatabaseWorker { owners, minHeight, maxHeight, + bundledIn, tags, }); if (txs.length < pageSize) { + const lastTxHeight = txs[txs.length - 1].height; txs = txs.concat( this.getGqlNewTransactions({ pageSize, @@ -1434,8 +1463,9 @@ export class StandaloneSqliteDatabaseWorker { recipients, owners, minHeight: - txs.length > 0 ? txs[txs.length - 1].height + 1 : minHeight, + txs.length > 0 && lastTxHeight ? lastTxHeight : minHeight, maxHeight, + bundledIn, tags, }), ); @@ -2094,6 +2124,7 @@ export class StandaloneSqliteDatabase owners = [], minHeight = -1, maxHeight = -1, + bundledIn, tags = [], }: { pageSize: number; @@ -2104,6 +2135,7 @@ export class StandaloneSqliteDatabase owners?: string[]; minHeight?: number; maxHeight?: number; + bundledIn?: string[]; tags?: { name: string; values: string[] }[]; }) { return this.queueRead('gql', 'getGqlTransactions', [ @@ -2116,6 +2148,7 @@ export class StandaloneSqliteDatabase owners, minHeight, maxHeight, + bundledIn, tags, }, ]); diff --git a/src/routes/graphql/resolvers.ts b/src/routes/graphql/resolvers.ts index f6ccb0aa..d7587d4b 100644 --- a/src/routes/graphql/resolvers.ts +++ b/src/routes/graphql/resolvers.ts @@ -65,12 +65,18 @@ export function resolveTxOwner(tx: GqlTransaction) { } export function resolveTxParent(tx: GqlTransaction) { + if (tx.parentId === null) { + return null; + } return { id: tx.parentId, }; } export function resolveTxBundledIn(tx: GqlTransaction) { + if (tx.parentId === null) { + return null; + } return { id: tx.parentId, }; @@ -99,9 +105,13 @@ export const resolvers: IResolvers = { ids: queryParams.ids, recipients: queryParams.recipients, owners: queryParams.owners, - tags: queryParams.tags || [], minHeight: queryParams.block?.min, maxHeight: queryParams.block?.max, + bundledIn: + queryParams.bundledIn !== undefined + ? queryParams.bundledIn + : queryParams.parent, + tags: queryParams.tags || [], }); }, block: async (_, queryParams, { db }) => { diff --git a/src/types.d.ts b/src/types.d.ts index fef8a7e7..377b9dd9 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -231,7 +231,7 @@ interface GqlTransaction { blockTimestamp: number | undefined; height: number | undefined; blockPreviousBlock: string | undefined; - parentId: string | undefined; + parentId: string | null; } interface GqlTransactionEdge { From 0f8894b258e4e66ce10e1d828eebbbf97682900d Mon Sep 17 00:00:00 2001 From: David Whittington Date: Thu, 1 Jun 2023 16:33:32 -0500 Subject: [PATCH 12/33] feat(sqlite graphql): support querying "new" data items PE-3769 Adds support for querying data items that have not yet been flushed to the stable (> 50 blocks old) tables. Note, there are still some edge cases to work out with this and new data querying in general. In particular, we don't currently support querying data that has not yet been associated with a block or data is technically stable but was indexed late (e.g. due to missing chunks) and has not yet been flushed. --- src/database/standalone-sqlite.ts | 107 ++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 12 deletions(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index bec542c9..5cf5b840 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -907,6 +907,7 @@ export class StandaloneSqliteDatabaseWorker { 'nb.indep_hash AS block_indep_hash', 'nb.block_timestamp AS block_timestamp', 'nb.previous_block AS block_previous_block', + "'' AS parent_id" ) .from('new_transactions nt') .join('new_block_transactions nbt', { @@ -920,6 +921,39 @@ export class StandaloneSqliteDatabaseWorker { }); } + getGqlNewDataItemsBaseSql() { + return sql + .select( + 'ndi.height AS height', + 'nbt.block_transaction_index AS block_transaction_index', + 'id AS data_item_id', + 'id', + 'anchor', + 'signature', + 'target', + "'' AS reward", + "'' AS quantity", + 'CAST(data_size AS TEXT) AS data_size', + 'content_type', + 'owner_address', + 'public_modulus', + 'nb.indep_hash AS block_indep_hash', + 'nb.block_timestamp AS block_timestamp', + 'nb.previous_block AS block_previous_block', + 'ndi.parent_id', + ) + .from('new_data_items ndi') + .join('new_block_transactions nbt', { + 'nbt.transaction_id': 'ndi.root_transaction_id', + }) + .join('new_blocks nb', { + 'nb.indep_hash': 'nbt.block_indep_hash', + }) + .join('bundles.wallets w', { + 'ndi.owner_address': 'w.address', + }); + } + getGqlStableTransactionsBaseSql() { return sql .select( @@ -939,7 +973,7 @@ export class StandaloneSqliteDatabaseWorker { 'sb.indep_hash AS block_indep_hash', 'sb.block_timestamp AS block_timestamp', 'sb.previous_block AS block_previous_block', - "'' AS parent_id", + "'' AS parent_id" ) .from('stable_transactions st') .join('stable_blocks sb', { @@ -1037,13 +1071,20 @@ export class StandaloneSqliteDatabaseWorker { blockTransactionIndexSortTableAlias = 'sdi'; maxDbHeight = this.stmts.core.selectMaxStableBlockHeight.get() .height as number; - } else { + } else if (source === 'new_txs') { txTableAlias = 'nt'; heightTableAlias = 'nt'; blockTransactionIndexTableAlias = 'nbt'; tagsTable = 'new_transaction_tags'; heightSortTableAlias = 'nt'; blockTransactionIndexSortTableAlias = 'nbt'; + } else { + txTableAlias = 'ndi'; + heightTableAlias = 'ndi'; + blockTransactionIndexTableAlias = 'nbt'; + tagsTable = 'new_data_item_tags'; + heightSortTableAlias = 'ndi'; + blockTransactionIndexSortTableAlias = 'nbt'; } if (ids.length > 0) { @@ -1126,7 +1167,10 @@ export class StandaloneSqliteDatabaseWorker { query.where(sql.lte(`${heightTableAlias}.height`, maxHeight)); } - if (Array.isArray(bundledIn) && source === 'stable_items') { + if ( + Array.isArray(bundledIn) && + (source === 'stable_items' || source === 'new_items') + ) { query.where( sql.in(`${txTableAlias}.parent_id`, bundledIn.map(fromB64Url)), ); @@ -1218,6 +1262,7 @@ export class StandaloneSqliteDatabaseWorker { owners = [], minHeight = -1, maxHeight = -1, + bundledIn, tags = [], }: { pageSize: number; @@ -1231,10 +1276,10 @@ export class StandaloneSqliteDatabaseWorker { bundledIn?: string[] | null; tags?: { name: string; values: string[] }[]; }) { - const query = this.getGqlNewTransactionsBaseSql(); + const txsQuery = this.getGqlNewTransactionsBaseSql(); this.addGqlTransactionFilters({ - query, + query: txsQuery, source: 'new_txs', cursor, sortOrder, @@ -1246,12 +1291,50 @@ export class StandaloneSqliteDatabaseWorker { tags, }); - const queryParams = query.toParams(); - const sql = queryParams.text; - const sqliteParams = toSqliteParams(queryParams); + const txsQueryParams = txsQuery.toParams(); + const txsSql = txsQueryParams.text; + const txsFinalSql = `${txsSql} LIMIT ${pageSize + 1}`; + + const itemsQuery = this.getGqlNewDataItemsBaseSql(); + + this.addGqlTransactionFilters({ + query: itemsQuery, + source: 'new_items', + cursor, + sortOrder, + ids, + recipients, + owners, + minHeight, + maxHeight, + bundledIn, + tags, + }); + + const itemsQueryParams = itemsQuery.toParams(); + const itemsSql = itemsQueryParams.text; + const itemsFinalSql = `${itemsSql} LIMIT ${pageSize + 1}`; + + const sqlSortOrder = sortOrder === 'HEIGHT_DESC' ? 'DESC' : 'ASC'; + const sqlParts = []; + if (bundledIn === undefined || bundledIn === null) { + sqlParts.push(`SELECT * FROM (${txsFinalSql})`); + } + if (bundledIn === undefined) { + sqlParts.push('UNION'); + } + if (bundledIn === undefined || Array.isArray(bundledIn)) { + sqlParts.push(`SELECT * FROM (${itemsFinalSql})`); + } + sqlParts.push( + `ORDER BY 1 ${sqlSortOrder}, 2 ${sqlSortOrder}, 3 ${sqlSortOrder}`, + ); + sqlParts.push(`LIMIT ${pageSize + 1}`); + const sql = sqlParts.join(' '); + const sqliteParams = toSqliteParams(itemsQueryParams); return this.dbs.core - .prepare(`${sql} LIMIT ${pageSize + 1}`) + .prepare(sql) .all(sqliteParams) .map((tx) => ({ height: tx.height, @@ -1271,7 +1354,7 @@ export class StandaloneSqliteDatabaseWorker { blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, blockPreviousBlock: toB64Url(tx.block_previous_block), - parentId: null, // TODO implement this + parentId: tx.parent_id ? toB64Url(tx.parent_id) : null, })); } @@ -1421,7 +1504,7 @@ export class StandaloneSqliteDatabaseWorker { }); if (txs.length < pageSize) { - const lastTxHeight = txs[txs.length - 1].height; + const lastTxHeight = txs[txs.length - 1]?.height; txs = txs.concat( this.getGqlStableTransactions({ pageSize, @@ -1453,7 +1536,7 @@ export class StandaloneSqliteDatabaseWorker { }); if (txs.length < pageSize) { - const lastTxHeight = txs[txs.length - 1].height; + const lastTxHeight = txs[txs.length - 1]?.height; txs = txs.concat( this.getGqlNewTransactions({ pageSize, From f78b6da977f3991430add1009ac995968b503496 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Fri, 2 Jun 2023 13:18:04 -0500 Subject: [PATCH 13/33] feat(ans-104 bundles): add worker to index data items PE-3769 Adds a simple queue + worker index data items (similar to the one for indexing nested data). Currently there is no back pressure or other congestion control so if the queue gets too backed up it may crash the service. This issue will be address in a future commit. --- docker-compose.yaml | 2 +- src/config.ts | 4 +- src/events.ts | 6 ++- src/lib/ans-104.ts | 4 +- src/lib/bundles.test.ts | 2 +- src/lib/bundles.ts | 3 +- src/system.ts | 14 ++++- src/types.d.ts | 4 ++ src/workers/ans104-data-indexer.ts | 4 +- src/workers/data-item-indexer.ts | 84 ++++++++++++++++++++++++++++++ 10 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 src/workers/data-item-indexer.ts diff --git a/docker-compose.yaml b/docker-compose.yaml index 327aef82..ef7d626b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -43,6 +43,6 @@ services: - AR_IO_WALLET=${AR_IO_WALLET:-} - ADMIN_API_KEY=${ADMIN_API_KEY:-} - ANS104_UNBUNDLE_FILTER=${ANS104_UNBUNDLE_FILTER:-} - - ANS104_DATA_INDEX_FILTER=${ANS104_DATA_INDEX_FILTER:-} + - ANS104_INDEX_FILTER=${ANS104_INDEX_FILTER:-} - ARNS_ROOT_HOST=${ARNS_ROOT_HOST:-} - SANDBOX_PROTOCOL=${SANDBOX_PROTOCOL:-} diff --git a/src/config.ts b/src/config.ts index ed4ad265..b33b5152 100644 --- a/src/config.ts +++ b/src/config.ts @@ -55,8 +55,8 @@ if (env.varOrUndefined('ADMIN_API_KEY') === undefined) { export const ANS104_UNBUNDLE_FILTER = createFilter( JSON.parse(env.varOrDefault('ANS104_UNBUNDLE_FILTER', '{"never": true}')), ); -export const ANS104_DATA_INDEX_FILTER = createFilter( - JSON.parse(env.varOrDefault('ANS104_DATA_INDEX_FILTER', '{"never": true}')), +export const ANS104_INDEX_FILTER = createFilter( + JSON.parse(env.varOrDefault('ANS104_INDEX_FILTER', '{"never": true}')), ); export const ARNS_ROOT_HOST = env.varOrUndefined('ARNS_ROOT_HOST'); export const SANDBOX_PROTOCOL = env.varOrUndefined('SANDBOX_PROTOCOL'); diff --git a/src/events.ts b/src/events.ts index 1c81d06d..1d4bd9ba 100644 --- a/src/events.ts +++ b/src/events.ts @@ -15,12 +15,14 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +export const ANS104_DATA_ITEM_DATA_INDEXED = 'ans104-data-item-data-indexed'; +export const ANS104_DATA_ITEM_INDEXED = 'ans104-data-indexed'; +export const ANS104_DATA_ITEM_UNBUNDLED = 'asn104-data-item-unbundled'; export const ANS104_TX_INDEXED = 'ans104-tx-indexed'; -export const BLOCK_INDEXED = 'block-indexed'; export const BLOCK_FETCHED = 'block-fetched'; +export const BLOCK_INDEXED = 'block-indexed'; export const BLOCK_TX_FETCHED = 'block-tx-fetched'; export const BLOCK_TX_FETCH_FAILED = 'block-tx-fetch-failed'; export const BLOCK_TX_INDEXED = 'block-tx-indexed'; -export const DATA_ITEM_UNBUNDLED = 'data-item-unbundled'; export const TX_FETCHED = 'tx-fetched'; export const TX_INDEXED = 'tx-indexed'; diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 161088e7..0d39f367 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -7,6 +7,7 @@ import { Worker, isMainThread, parentPort } from 'node:worker_threads'; import { default as wait } from 'wait'; import * as winston from 'winston'; +import * as events from '../events.js'; import log from '../log.js'; import { ContiguousDataSource, NormalizedDataItem } from '../types.js'; import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; @@ -68,10 +69,9 @@ export class Ans104Parser { this.worker.on( 'message', ((message: any) => { - this.log.info('message', { message }); switch (message.eventName) { case 'data-item-unbundled': - eventEmitter.emit(message.eventName, message.dataItem); + eventEmitter.emit(events.ANS104_DATA_ITEM_UNBUNDLED, message.dataItem); break; case 'unbundle-complete': this.unbundlePromise = undefined; diff --git a/src/lib/bundles.test.ts b/src/lib/bundles.test.ts index 7ff17b7d..e0fbe040 100644 --- a/src/lib/bundles.test.ts +++ b/src/lib/bundles.test.ts @@ -46,7 +46,7 @@ describe('importAns104Bundle', () => { it('should proccess bundles and save data items to the database using default batch size', async () => { let emitCount = 0; - eventEmitter.on(events.DATA_ITEM_UNBUNDLED, () => { + eventEmitter.on(events.ANS104_DATA_ITEM_UNBUNDLED, () => { emitCount++; }); await emitAns104UnbundleEvents({ diff --git a/src/lib/bundles.ts b/src/lib/bundles.ts index ec59b2e9..65e546a0 100644 --- a/src/lib/bundles.ts +++ b/src/lib/bundles.ts @@ -20,6 +20,7 @@ import * as EventEmitter from 'node:events'; import stream from 'node:stream'; import * as winston from 'winston'; +import * as events from '../../src/events.js'; import { NormalizedDataItem } from '../types.js'; import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; @@ -111,7 +112,7 @@ export async function emitAns104UnbundleEvents({ } eventEmitter.emit( - 'data-item-unbundled', + events.ANS104_DATA_ITEM_UNBUNDLED, normalizeAns104DataItem(parentTxId, dataItem), ); } diff --git a/src/system.ts b/src/system.ts index ef3290a4..ffba02b5 100644 --- a/src/system.ts +++ b/src/system.ts @@ -42,6 +42,7 @@ import { BlockListValidator, ChainIndex, ContiguousDataIndex, + DataItemIndexWriter, MatchableItem, NestedDataIndexWriter, PartialJsonTransaction, @@ -49,6 +50,7 @@ import { import { Ans104DataIndexer } from './workers/ans104-data-indexer.js'; import { Ans104Unbundler } from './workers/ans104-unbundler.js'; import { BlockImporter } from './workers/block-importer.js'; +import { DataItemIndexer } from './workers/data-item-indexer.js'; import { TransactionFetcher } from './workers/transaction-fetcher.js'; import { TransactionImporter } from './workers/transaction-importer.js'; import { TransactionRepairWorker } from './workers/transaction-repair-worker.js'; @@ -105,6 +107,7 @@ export const chainIndex: ChainIndex = db; export const contiguousDataIndex: ContiguousDataIndex = db; export const blockListValidator: BlockListValidator = db; export const nestedDataIndexWriter: NestedDataIndexWriter = db; +export const dataItemIndexWriter: DataItemIndexWriter = db; // Workers const eventEmitter = new EventEmitter(); @@ -207,14 +210,21 @@ eventEmitter.on( }, ); +const dataItemIndexer = new DataItemIndexer({ + log, + eventEmitter, + indexWriter: dataItemIndexWriter, +}); + const ans104DataIndexer = new Ans104DataIndexer({ log, eventEmitter, indexWriter: nestedDataIndexWriter, }); -eventEmitter.on(events.DATA_ITEM_UNBUNDLED, async (dataItem: any) => { - if (await config.ANS104_DATA_INDEX_FILTER.match(dataItem)) { +eventEmitter.on(events.ANS104_DATA_ITEM_UNBUNDLED, async (dataItem: any) => { + if (await config.ANS104_INDEX_FILTER.match(dataItem)) { + dataItemIndexer.queueDataItem(dataItem); ans104DataIndexer.queueDataItem(dataItem); } }); diff --git a/src/types.d.ts b/src/types.d.ts index 377b9dd9..10bcf430 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -184,6 +184,10 @@ export interface ChainIndex { ): Promise; } +export interface DataItemIndexWriter { + saveDataItem(item: NormalizedDataItem): Promise; +} + export interface NestedDataIndexWriter { saveNestedDataId({ id, diff --git a/src/workers/ans104-data-indexer.ts b/src/workers/ans104-data-indexer.ts index c5855c30..7faa066c 100644 --- a/src/workers/ans104-data-indexer.ts +++ b/src/workers/ans104-data-indexer.ts @@ -57,6 +57,7 @@ export class Ans104DataIndexer { method: 'queueDataItem', id: item.id, parentId: item.parent_id, + rootTxId: item.root_tx_id, dataOffset: item?.data_offset, dataSize: item?.data_size, }); @@ -70,6 +71,7 @@ export class Ans104DataIndexer { method: 'indexDataItem', id: item.id, parentId: item.parent_id, + rootTxId: item.root_tx_id, dataOffset: item?.data_offset, dataSize: item?.data_size, }); @@ -86,7 +88,7 @@ export class Ans104DataIndexer { dataOffset: item.data_offset, dataSize: item.data_size, }); - this.eventEmitter.emit(events.ANS104_TX_INDEXED, item); + this.eventEmitter.emit(events.ANS104_DATA_ITEM_DATA_INDEXED, item); log.debug('Data item data indexed.'); } else { this.log.warn('Data item data is missing data offset or size.'); diff --git a/src/workers/data-item-indexer.ts b/src/workers/data-item-indexer.ts new file mode 100644 index 00000000..77638146 --- /dev/null +++ b/src/workers/data-item-indexer.ts @@ -0,0 +1,84 @@ +/** + * AR.IO Gateway + * Copyright (C) 2023 Permanent Data Solutions, Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +import { default as fastq } from 'fastq'; +import type { queueAsPromised } from 'fastq'; +import * as EventEmitter from 'node:events'; +import * as winston from 'winston'; + +import * as events from '../events.js'; +import { DataItemIndexWriter, NormalizedDataItem } from '../types.js'; + +const DEFAULT_WORKER_COUNT = 1; + +export class DataItemIndexer { + // Dependencies + private log: winston.Logger; + private eventEmitter: EventEmitter; + private indexWriter: DataItemIndexWriter; + + // Data indexing queue + private queue: queueAsPromised; + + constructor({ + log, + eventEmitter, + indexWriter, + workerCount = DEFAULT_WORKER_COUNT, + }: { + log: winston.Logger; + eventEmitter: EventEmitter; + indexWriter: DataItemIndexWriter; + workerCount?: number; + }) { + this.log = log.child({ class: 'DataItemIndexer' }); + this.indexWriter = indexWriter; + this.eventEmitter = eventEmitter; + + this.queue = fastq.promise(this.indexDataItem.bind(this), workerCount); + } + + async queueDataItem(item: NormalizedDataItem): Promise { + const log = this.log.child({ + method: 'queueDataItem', + id: item.id, + parentId: item.parent_id, + rootTxId: item.root_tx_id, + }); + log.debug('Queueing data item for indexing...'); + this.queue.push(item); + log.debug('Data item queued for indexing.'); + } + + async indexDataItem(item: NormalizedDataItem): Promise { + const log = this.log.child({ + method: 'indexDataItem', + id: item.id, + parentId: item.parent_id, + rootTxId: item.root_tx_id, + }); + + try { + log.debug('Indexing data item...'); + this.indexWriter.saveDataItem(item); + this.eventEmitter.emit(events.ANS104_DATA_ITEM_INDEXED, item); + log.debug('Data item indexed.'); + } catch (error) { + log.error('Failed to index data item data:', error); + } + } +} From 96d06806bb98cd321516422b574540de285c8c7a Mon Sep 17 00:00:00 2001 From: David Whittington Date: Fri, 2 Jun 2023 15:04:56 -0500 Subject: [PATCH 14/33] fix(data): pause the cache stream after setting up internal handlers PE-3769 We pause the stream to give the downstream consumer a chance to setup its own handler before data starts flowing. Of course, this still has to happen relatively quickly since node.js + the OS won't buffer indefinitely once data starts flowing over the network, but it should still prevent some obvious app level races. --- src/data/read-through-data-cache.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/read-through-data-cache.ts b/src/data/read-through-data-cache.ts index ae9d9bb3..45e41ee8 100644 --- a/src/data/read-through-data-cache.ts +++ b/src/data/read-through-data-cache.ts @@ -158,6 +158,7 @@ export class ReadThroughDataCache implements ContiguousDataSource { data.stream.on('data', (chunk) => { hasher.update(chunk); }); + data.stream.pause(); return data; } From 699344986056899a24803c3074ee863954cb747b Mon Sep 17 00:00:00 2001 From: David Whittington Date: Fri, 2 Jun 2023 15:35:12 -0500 Subject: [PATCH 15/33] fix(bundles graphql): correctly return data items tags PE-3769 Add queries to retrieve data items tags and return them in GraphQL. In the SQLite DB implementation these are separate queries for convenience. If we were making requests to something like PostgreSQL we'd probably bundle this into the main query. --- src/database/sql/bundles/gql.sql | 13 ++++++++++++ src/database/standalone-sqlite.ts | 34 +++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 src/database/sql/bundles/gql.sql diff --git a/src/database/sql/bundles/gql.sql b/src/database/sql/bundles/gql.sql new file mode 100644 index 00000000..f70fb58d --- /dev/null +++ b/src/database/sql/bundles/gql.sql @@ -0,0 +1,13 @@ +-- selectNewDataItemTags +SELECT name, value +FROM new_data_item_tags +JOIN tag_names ON tag_name_hash = tag_names.hash +JOIN tag_values ON tag_value_hash = tag_values.hash +WHERE data_item_id = @id + +-- selectStableDataItemTags +SELECT name, value +FROM stable_data_item_tags +JOIN tag_names ON tag_name_hash = tag_names.hash +JOIN tag_values ON tag_value_hash = tag_values.hash +WHERE data_item_id = @id diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 5cf5b840..f1371aec 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -877,6 +877,17 @@ export class StandaloneSqliteDatabaseWorker { })); } + getGqlNewDataItemTags(id: Buffer) { + const tags = this.stmts.bundles.selectNewDataItemTags.all({ + id: id, + }); + + return tags.map((tag) => ({ + name: tag.name.toString('utf8'), + value: tag.value.toString('utf8'), + })); + } + getGqlStableTransactionTags(txId: Buffer) { const tags = this.stmts.core.selectStableTransactionTags.all({ transaction_id: txId, @@ -888,6 +899,17 @@ export class StandaloneSqliteDatabaseWorker { })); } + getGqlStableDataItemTags(id: Buffer) { + const tags = this.stmts.bundles.selectStableDataItemTags.all({ + id: id, + }); + + return tags.map((tag) => ({ + name: tag.name.toString('utf8'), + value: tag.value.toString('utf8'), + })); + } + getGqlNewTransactionsBaseSql() { return sql .select( @@ -907,7 +929,7 @@ export class StandaloneSqliteDatabaseWorker { 'nb.indep_hash AS block_indep_hash', 'nb.block_timestamp AS block_timestamp', 'nb.previous_block AS block_previous_block', - "'' AS parent_id" + "'' AS parent_id", ) .from('new_transactions nt') .join('new_block_transactions nbt', { @@ -973,7 +995,7 @@ export class StandaloneSqliteDatabaseWorker { 'sb.indep_hash AS block_indep_hash', 'sb.block_timestamp AS block_timestamp', 'sb.previous_block AS block_previous_block', - "'' AS parent_id" + "'' AS parent_id", ) .from('stable_transactions st') .join('stable_blocks sb', { @@ -1349,7 +1371,9 @@ export class StandaloneSqliteDatabaseWorker { fee: tx.reward, quantity: tx.quantity, dataSize: tx.data_size, - tags: this.getGqlNewTransactionTags(tx.id), + tags: tx.data_item_id.length > 1 + ? this.getGqlNewDataItemTags(tx.id) + : this.getGqlNewTransactionTags(tx.id), contentType: tx.content_type, blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, @@ -1455,7 +1479,9 @@ export class StandaloneSqliteDatabaseWorker { fee: tx.reward, quantity: tx.quantity, dataSize: tx.data_size, - tags: this.getGqlStableTransactionTags(tx.id), + tags: tx.data_item_id.length > 1 + ? this.getGqlStableDataItemTags(tx.id) + : this.getGqlStableTransactionTags(tx.id), contentType: tx.content_type, blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, From 1832d23d2617fdcb1b81c65f62edfc994f84afbb Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 5 Jun 2023 14:20:36 -0500 Subject: [PATCH 16/33] perf(sqlite graphql): add new_data_item data_item_id index PE-3769 Since tags are retrieved in a second query by data_item_id, this significantly improves the performance of retrieving tags for data items that have not yet been flushed to the stable data items table (stable data items already have a similar index). --- ...5T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql | 4 ++++ ...5T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 migrations/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql create mode 100644 migrations/down/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql diff --git a/migrations/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql b/migrations/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql new file mode 100644 index 00000000..631ce64a --- /dev/null +++ b/migrations/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql @@ -0,0 +1,4 @@ +DROP INDEX IF EXISTS stable_data_item_tags_transaction_id_idx; +CREATE INDEX IF NOT EXISTS stable_data_item_tags_data_item_id_idx ON stable_data_item_tags (data_item_id); + +CREATE INDEX IF NOT EXISTS new_data_item_tags_data_item_id_idx ON new_data_item_tags (data_item_id); diff --git a/migrations/down/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql b/migrations/down/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql new file mode 100644 index 00000000..47d36891 --- /dev/null +++ b/migrations/down/2023.06.05T17.36.05.bundles.data-item-tags-data-item-id-indexes.sql @@ -0,0 +1,4 @@ +DROP INDEX IF EXISTS new_data_item_tags_data_item_id_idx; + +DROP INDEX IF EXISTS stable_data_item_tags_data_item_id_idx; +CREATE INDEX IF NOT EXISTS stable_data_item_tags_transaction_id_idx ON stable_data_item_tags (data_item_id); From 9218dba4622a22085cf0adec9f854a086a931956 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 5 Jun 2023 14:44:36 -0500 Subject: [PATCH 17/33] feat(sqlite bundles): record all parent/child relationships for matching data items PE-3769 We don't want a data item with the same ID to appear multiple times in GraphQL, so we only insert unique IDs into new_data_items. However, we'd still like to have a record of all the bundles containing a particular ID. This is important if a bundle is removed (due to content moderation) or the parent association needs to be changed for any other unforeseen reason. --- src/database/sql/bundles/import.sql | 7 +++++++ src/database/standalone-sqlite.ts | 18 +++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/database/sql/bundles/import.sql b/src/database/sql/bundles/import.sql index 3f1a0987..1c0ff5a0 100644 --- a/src/database/sql/bundles/import.sql +++ b/src/database/sql/bundles/import.sql @@ -1,3 +1,10 @@ +-- insertBundleDataItem +INSERT INTO bundle_data_items ( + id, parent_id, root_transaction_id, indexed_at +) VALUES ( + @id, @parent_id, @root_transaction_id, @indexed_at +) + -- insertOrIgnoreWallet INSERT INTO wallets (address, public_modulus) VALUES (@address, @public_modulus) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index f1371aec..68b6aff6 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -269,7 +269,7 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { tagValues, newDataItemTags, wallets, - newBundleDataItem: { + bundleDataItem: { id, parent_id: parentId, root_transaction_id: rootTxId, @@ -474,6 +474,8 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.bundles.insertOrIgnoreWallet.run(row); } + this.stmts.bundles.insertBundleDataItem.run(rows.bundleDataItem); + this.stmts.bundles.upsertNewDataItem.run({ ...rows.newDataItem, height, @@ -1371,9 +1373,10 @@ export class StandaloneSqliteDatabaseWorker { fee: tx.reward, quantity: tx.quantity, dataSize: tx.data_size, - tags: tx.data_item_id.length > 1 - ? this.getGqlNewDataItemTags(tx.id) - : this.getGqlNewTransactionTags(tx.id), + tags: + tx.data_item_id.length > 1 + ? this.getGqlNewDataItemTags(tx.id) + : this.getGqlNewTransactionTags(tx.id), contentType: tx.content_type, blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, @@ -1479,9 +1482,10 @@ export class StandaloneSqliteDatabaseWorker { fee: tx.reward, quantity: tx.quantity, dataSize: tx.data_size, - tags: tx.data_item_id.length > 1 - ? this.getGqlStableDataItemTags(tx.id) - : this.getGqlStableTransactionTags(tx.id), + tags: + tx.data_item_id.length > 1 + ? this.getGqlStableDataItemTags(tx.id) + : this.getGqlStableTransactionTags(tx.id), contentType: tx.content_type, blockIndepHash: toB64Url(tx.block_indep_hash), blockTimestamp: tx.block_timestamp, From 29b2ee9ba11ed33719c889b7a074c3af1034283c Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 5 Jun 2023 20:16:03 -0500 Subject: [PATCH 18/33] fix(sqlite bundles): correct join condition for data item tags PE-3769 The wrong id column was being used for new data items and data item was missing from the stable data item join (not needed for transactions since height and block index are sufficient). --- src/database/standalone-sqlite.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 68b6aff6..75c44594 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -1100,6 +1100,7 @@ export class StandaloneSqliteDatabaseWorker { heightTableAlias = 'nt'; blockTransactionIndexTableAlias = 'nbt'; tagsTable = 'new_transaction_tags'; + tagIdColumn = 'transaction_id'; heightSortTableAlias = 'nt'; blockTransactionIndexSortTableAlias = 'nbt'; } else { @@ -1107,6 +1108,7 @@ export class StandaloneSqliteDatabaseWorker { heightTableAlias = 'ndi'; blockTransactionIndexTableAlias = 'nbt'; tagsTable = 'new_data_item_tags'; + tagIdColumn = 'data_item_id'; heightSortTableAlias = 'ndi'; blockTransactionIndexSortTableAlias = 'nbt'; } @@ -1139,6 +1141,9 @@ export class StandaloneSqliteDatabaseWorker { [`${blockTransactionIndexTableAlias}.block_transaction_index`]: `${tagAlias}.block_transaction_index`, [`${heightTableAlias}.height`]: `${tagAlias}.height`, }; + if (source === 'stable_items') { + joinCond[`${txTableAlias}.id`] = `${tagAlias}.${tagIdColumn}`; + } } else { const previousTagAlias = `"${index - 1}_${index - 1}"`; joinCond = { @@ -1157,7 +1162,7 @@ export class StandaloneSqliteDatabaseWorker { } } else { joinCond = { - [`${txTableAlias}.id`]: `${tagAlias}.transaction_id`, + [`${txTableAlias}.id`]: `${tagAlias}.${tagIdColumn}`, }; } From 199bfe4ca926949933a2037aab8ee1d43931fc6a Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 6 Jun 2023 12:00:45 -0500 Subject: [PATCH 19/33] chore(sqlite): improve worker error logging PE-3769 Adds a try/catch in the worker thread to log errors. Also alters the error handling in workers so that workers no longer immediately exit when an error occurs. Instead they wait till an error threshold is reached (currently 100 errors) and then exit. This preserves some level of "fail fast" error handling while reducing overhead of creating a new worker after every error. --- src/database/standalone-sqlite.ts | 183 +++++++++++++++++------------- 1 file changed, 101 insertions(+), 82 deletions(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 75c44594..6cdd1c74 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -43,6 +43,7 @@ import { utf8ToB64Url, } from '../lib/encoding.js'; import { MANIFEST_CONTENT_TYPE } from '../lib/encoding.js'; +import log from '../log.js'; import { BlockListValidator, ChainIndex, @@ -58,6 +59,8 @@ import { const CPU_COUNT = os.cpus().length; +const MAX_WORKER_ERRORS = 100; + const STABLE_FLUSH_INTERVAL = 5; const NEW_TX_CLEANUP_WAIT_SECS = 60 * 60 * 2; const NEW_DATA_ITEM_CLEANUP_WAIT_SECS = 60 * 60 * 2; @@ -1863,7 +1866,7 @@ export class StandaloneSqliteDatabaseWorker { }); return row?.is_blocked === 1; } - return false + return false; } isHashBlocked(hash: string | undefined): boolean { @@ -2061,7 +2064,11 @@ export class StandaloneSqliteDatabase takeWork(); }) .on('message', (result) => { - job.resolve(result); + if (result === '__ERROR__') { + job.reject(new Error('Worker error')); + } else { + job.resolve(result); + } job = null; takeWork(); // Check if there's more work to do }) @@ -2371,87 +2378,99 @@ if (!isMainThread) { bundlesDbPath: workerData.bundlesDbPath, }); + let errorCount = 0; + parentPort?.on('message', ({ method, args }: WorkerMessage) => { - switch (method) { - case 'getMaxHeight': - const maxHeight = worker.getMaxHeight(); - parentPort?.postMessage(maxHeight); - break; - case 'getBlockHashByHeight': - const newBlockHash = worker.getBlockHashByHeight(args[0]); - parentPort?.postMessage(newBlockHash); - break; - case 'getMissingTxIds': - const missingTxIdsRes = worker.getMissingTxIds(args[0]); - parentPort?.postMessage(missingTxIdsRes); - break; - case 'resetToHeight': - worker.resetToHeight(args[0]); - parentPort?.postMessage(undefined); - break; - case 'saveTx': - worker.saveTx(args[0]); - parentPort?.postMessage(null); - break; - case 'saveDataItem': - worker.saveDataItem(args[0]); - parentPort?.postMessage(null); - break; - case 'saveBlockAndTxs': - const [block, txs, missingTxIds] = args; - worker.saveBlockAndTxs(block, txs, missingTxIds); - parentPort?.postMessage(null); - break; - case 'getDataAttributes': - const dataAttributes = worker.getDataAttributes(args[0]); - parentPort?.postMessage(dataAttributes); - break; - case 'getDataParent': - const dataParent = worker.getDataParent(args[0]); - parentPort?.postMessage(dataParent); - break; - case 'getDebugInfo': - const debugInfo = worker.getDebugInfo(); - parentPort?.postMessage(debugInfo); - break; - case 'saveDataContentAttributes': - worker.saveDataContentAttributes(args[0]); - parentPort?.postMessage(null); - break; - case 'getGqlTransactions': - const gqlTransactions = worker.getGqlTransactions(args[0]); - parentPort?.postMessage(gqlTransactions); - break; - case 'getGqlTransaction': - const gqlTransaction = worker.getGqlTransaction(args[0]); - parentPort?.postMessage(gqlTransaction); - break; - case 'getGqlBlocks': - const gqlBlocks = worker.getGqlBlocks(args[0]); - parentPort?.postMessage(gqlBlocks); - break; - case 'getGqlBlock': - const gqlBlock = worker.getGqlBlock(args[0]); - parentPort?.postMessage(gqlBlock); - break; - case 'isIdBlocked': - const isIdBlocked = worker.isIdBlocked(args[0]); - parentPort?.postMessage(isIdBlocked); - break; - case 'isHashBlocked': - const isHashBlocked = worker.isHashBlocked(args[0]); - parentPort?.postMessage(isHashBlocked); - break; - case 'blockData': - worker.blockData(args[0]); - parentPort?.postMessage(null); - break; - case 'saveNestedDataId': - worker.saveNestedDataId(args[0]); - parentPort?.postMessage(null); - break; - case 'terminate': - process.exit(0); + try { + switch (method) { + case 'getMaxHeight': + const maxHeight = worker.getMaxHeight(); + parentPort?.postMessage(maxHeight); + break; + case 'getBlockHashByHeight': + const newBlockHash = worker.getBlockHashByHeight(args[0]); + parentPort?.postMessage(newBlockHash); + break; + case 'getMissingTxIds': + const missingTxIdsRes = worker.getMissingTxIds(args[0]); + parentPort?.postMessage(missingTxIdsRes); + break; + case 'resetToHeight': + worker.resetToHeight(args[0]); + parentPort?.postMessage(undefined); + break; + case 'saveTx': + worker.saveTx(args[0]); + parentPort?.postMessage(null); + break; + case 'saveDataItem': + worker.saveDataItem(args[0]); + parentPort?.postMessage(null); + break; + case 'saveBlockAndTxs': + const [block, txs, missingTxIds] = args; + worker.saveBlockAndTxs(block, txs, missingTxIds); + parentPort?.postMessage(null); + break; + case 'getDataAttributes': + const dataAttributes = worker.getDataAttributes(args[0]); + parentPort?.postMessage(dataAttributes); + break; + case 'getDataParent': + const dataParent = worker.getDataParent(args[0]); + parentPort?.postMessage(dataParent); + break; + case 'getDebugInfo': + const debugInfo = worker.getDebugInfo(); + parentPort?.postMessage(debugInfo); + break; + case 'saveDataContentAttributes': + worker.saveDataContentAttributes(args[0]); + parentPort?.postMessage(null); + break; + case 'getGqlTransactions': + const gqlTransactions = worker.getGqlTransactions(args[0]); + parentPort?.postMessage(gqlTransactions); + break; + case 'getGqlTransaction': + const gqlTransaction = worker.getGqlTransaction(args[0]); + parentPort?.postMessage(gqlTransaction); + break; + case 'getGqlBlocks': + const gqlBlocks = worker.getGqlBlocks(args[0]); + parentPort?.postMessage(gqlBlocks); + break; + case 'getGqlBlock': + const gqlBlock = worker.getGqlBlock(args[0]); + parentPort?.postMessage(gqlBlock); + break; + case 'isIdBlocked': + const isIdBlocked = worker.isIdBlocked(args[0]); + parentPort?.postMessage(isIdBlocked); + break; + case 'isHashBlocked': + const isHashBlocked = worker.isHashBlocked(args[0]); + parentPort?.postMessage(isHashBlocked); + break; + case 'blockData': + worker.blockData(args[0]); + parentPort?.postMessage(null); + break; + case 'saveNestedDataId': + worker.saveNestedDataId(args[0]); + parentPort?.postMessage(null); + break; + case 'terminate': + process.exit(0); + } + } catch (error) { + if (errorCount > MAX_WORKER_ERRORS) { + log.error('Too many errors in StandaloneSqlite worker, exiting.'); + process.exit(1); + } + log.error('Error in StandaloneSqlite worker:', error ); + errorCount++; + parentPort?.postMessage('__ERROR__'); } }); } From 11f7e9d30e13a46a3cf0da3cb2874bfaa895fdae Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 7 Jun 2023 16:28:59 -0500 Subject: [PATCH 20/33] doc(sqlite): add WIP bundle schema docs PE-3769 Adds WIP bundle schema docs generated by SchemaSpy. Run ./scripts/schemaspy to generate the docs in ./docs/sqlite/bundles. SchemaSpy properties and schema metadata are stored in ./docs/sqlite/bundles.properties and ./docs/sqlite/bundles.meta.xml respectively. --- .gitignore | 4 ++ docs/sqlite/bundles.meta.xml | 109 +++++++++++++++++++++++++++++++++ docs/sqlite/bundles.properties | 5 ++ scripts/schemaspy | 27 ++++++++ 4 files changed, 145 insertions(+) create mode 100644 docs/sqlite/bundles.meta.xml create mode 100644 docs/sqlite/bundles.properties create mode 100755 scripts/schemaspy diff --git a/.gitignore b/.gitignore index 1d0bfdaa..e1c406a0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ /data /dist /node_modules +/vendor + +# Generated docs +/docs/sqlite/bundles # VS Code /.vscode diff --git a/docs/sqlite/bundles.meta.xml b/docs/sqlite/bundles.meta.xml new file mode 100644 index 00000000..516abe76 --- /dev/null +++ b/docs/sqlite/bundles.meta.xml @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+
+
diff --git a/docs/sqlite/bundles.properties b/docs/sqlite/bundles.properties new file mode 100644 index 00000000..bb006198 --- /dev/null +++ b/docs/sqlite/bundles.properties @@ -0,0 +1,5 @@ +dbms=SQLite +description=Xerial +connectionSpec=jdbc:sqlite:data/sqlite/bundles.db?open_mode=1 +driver=org.sqlite.JDBC +driverPath=vendor/sqlite-jdbc-3.42.0.0.jar diff --git a/scripts/schemaspy b/scripts/schemaspy new file mode 100755 index 00000000..3f48aa58 --- /dev/null +++ b/scripts/schemaspy @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euo pipefail + +schemaspy_version="6.2.2" +schemaspy_jar="schemaspy-${schemaspy_version}.jar" +sqlite_jdbc_version="3.42.0.0" +sqlite_jdbc_jar="sqlite-jdbc-${sqlite_jdbc_version}.jar" + +mkdir -p vendor + +if [ ! -f vendor/${schemaspy_jar} ]; then + curl -L https://github.com/schemaspy/schemaspy/releases/download/v${schemaspy_version}/${schemaspy_jar} -o vendor/${schemaspy_jar} +fi + +if [ ! -f vendor/${sqlite_jdbc_jar} ]; then + curl -L https://github.com/xerial/sqlite-jdbc/releases/download/${sqlite_jdbc_version}/${sqlite_jdbc_jar} -o vendor/${sqlite_jdbc_jar} +fi + +java -jar vendor/schemaspy-*.jar \ + -debug \ + -t docs/sqlite/bundles.properties \ + -sso \ + -s bundles \ + -cat catalog \ + -meta docs/sqlite/bundles.meta.xml \ + -o docs/sqlite/bundles From 56a6dfb5e77bfafa979b7c9fe1e352e5631d8a73 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Fri, 9 Jun 2023 17:33:02 -0500 Subject: [PATCH 21/33] feat(sqlite bundles): add filter_id and parent_index to bundle_data_items PE-3769 Adds a parent_index and filter_id to bundle_data_items. parent_index (numeric index of the parent bundle in its parent bundle) distinguishs between data_items contained in duplicate parents in the same bundle. filter_id records the filter that caused the data item to be indexed (useful when determining what needs to potentially be reprocessed later). --- ...38.bundles.add-bundle-data-item-fields.sql | 15 +++++++ ...38.bundles.add-bundle-data-item-fields.sql | 9 ++++ src/database/sql/bundles/import.sql | 31 ++++++++------ src/database/standalone-sqlite.test.ts | 12 +++--- src/database/standalone-sqlite.ts | 6 ++- src/lib/ans-104.ts | 41 ++++++++++++++----- src/system.ts | 5 ++- src/types.d.ts | 2 + src/workers/ans104-unbundler.ts | 8 +++- test/bundles-schema.sql | 22 ++++++---- test/core-schema.sql | 1 + 11 files changed, 113 insertions(+), 39 deletions(-) create mode 100644 migrations/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql create mode 100644 migrations/down/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql diff --git a/migrations/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql b/migrations/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql new file mode 100644 index 00000000..a7521396 --- /dev/null +++ b/migrations/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS bundle_data_items; + +CREATE TABLE IF NOT EXISTS bundle_data_items ( + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + parent_index INTEGER NOT NULL, + filter_id INTEGER NOT NULL, + root_transaction_id BLOB NOT NULL, + first_indexed_at INTEGER NOT NULL, + last_indexed_at INTEGER NOT NULL, + PRIMARY KEY (id, parent_id, parent_index, filter_id) +); + +CREATE INDEX IF NOT EXISTS bundle_data_items_filter_id_idx + ON bundle_data_items (filter_id); diff --git a/migrations/down/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql b/migrations/down/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql new file mode 100644 index 00000000..25a919f3 --- /dev/null +++ b/migrations/down/2023.06.08T14.32.38.bundles.add-bundle-data-item-fields.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS bundle_data_items; + +CREATE TABLE IF NOT EXISTS bundle_data_items ( + id BLOB, + parent_id BLOB NOT NULL, + root_transaction_id BLOB NOT NULL, + indexed_at INTEGER NOT NULL, + PRIMARY KEY (id, parent_id) +); diff --git a/src/database/sql/bundles/import.sql b/src/database/sql/bundles/import.sql index 1c0ff5a0..0b1be323 100644 --- a/src/database/sql/bundles/import.sql +++ b/src/database/sql/bundles/import.sql @@ -1,10 +1,3 @@ --- insertBundleDataItem -INSERT INTO bundle_data_items ( - id, parent_id, root_transaction_id, indexed_at -) VALUES ( - @id, @parent_id, @root_transaction_id, @indexed_at -) - -- insertOrIgnoreWallet INSERT INTO wallets (address, public_modulus) VALUES (@address, @public_modulus) @@ -31,13 +24,27 @@ INSERT INTO new_data_item_tags ( @height, @indexed_at ) ON CONFLICT DO UPDATE SET height = IFNULL(@height, height) --- insertOrIgnoreBundleDataItem +-- upsertBundleDataItem INSERT INTO bundle_data_items ( - id, parent_id, root_transaction_id, indexed_at + id, + parent_id, + parent_index, + filter_id, + root_transaction_id, + first_indexed_at, + last_indexed_at ) VALUES ( - @id, @parent_id, @root_transaction_id, @indexed_at -) -ON CONFLICT DO NOTHING + @id, + @parent_id, + @parent_index, + @filter_id, + @root_transaction_id, + @indexed_at, + @indexed_at +) ON CONFLICT DO +UPDATE SET + filter_id = IFNULL(@filter_id, filter_id), + last_indexed_at = @indexed_at -- upsertNewDataItem INSERT INTO new_data_items ( diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index 023aa2ec..2bab92a5 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -140,11 +140,13 @@ describe('SQLite data conversion functions', () => { const bundleStream = await stubAns104Bundle(); const iterable = await processStream(bundleStream); for await (const [_index, dataItem] of iterable.entries()) { - const normalizedDataItem = normalizeAns104DataItem( - '0000000000000000000000000000000000000000000', - '0000000000000000000000000000000000000000000', - dataItem, - ); + const normalizedDataItem = normalizeAns104DataItem({ + rootTxId: '0000000000000000000000000000000000000000000', + parentId: '0000000000000000000000000000000000000000000', + parentIndex: -1, + index: 0, + ans104DataItem: dataItem, + }); const rows = dataItemToDbRows(normalizedDataItem); expect(rows.tagNames.length).to.be.above(0); expect(rows.tagValues.length).to.be.above(0); diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 6cdd1c74..9f3b593e 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -275,7 +275,9 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { bundleDataItem: { id, parent_id: parentId, + parent_index: item.parent_index, root_transaction_id: rootTxId, + filter_id: -1, // TODO remove once filters are in the DB indexed_at: currentTimestamp(), }, newDataItem: { @@ -477,7 +479,7 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.bundles.insertOrIgnoreWallet.run(row); } - this.stmts.bundles.insertBundleDataItem.run(rows.bundleDataItem); + this.stmts.bundles.upsertBundleDataItem.run(rows.bundleDataItem); this.stmts.bundles.upsertNewDataItem.run({ ...rows.newDataItem, @@ -2468,7 +2470,7 @@ if (!isMainThread) { log.error('Too many errors in StandaloneSqlite worker, exiting.'); process.exit(1); } - log.error('Error in StandaloneSqlite worker:', error ); + log.error('Error in StandaloneSqlite worker:', error); errorCount++; parentPort?.postMessage('__ERROR__'); } diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 0d39f367..ad4cf808 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -16,11 +16,19 @@ import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; // @ts-ignore const { default: processStream } = arbundles; -export function normalizeAns104DataItem( - rootTxId: string, - parentTxId: string, - ans104DataItem: Record, -): NormalizedDataItem { +export function normalizeAns104DataItem({ + rootTxId, + parentId, + parentIndex, + index, + ans104DataItem, +}: { + rootTxId: string; + parentId: string; + parentIndex: number; + index: number; + ans104DataItem: Record; +}): NormalizedDataItem { // TODO stricter type checking (maybe zod) const tags = (ans104DataItem.tags || []).map( @@ -32,7 +40,9 @@ export function normalizeAns104DataItem( return { id: ans104DataItem.id, - parent_id: parentTxId, + index: index, + parent_id: parentId, + parent_index: parentIndex, root_tx_id: rootTxId, signature: ans104DataItem.signature, owner: ans104DataItem.owner, @@ -71,7 +81,10 @@ export class Ans104Parser { ((message: any) => { switch (message.eventName) { case 'data-item-unbundled': - eventEmitter.emit(events.ANS104_DATA_ITEM_UNBUNDLED, message.dataItem); + eventEmitter.emit( + events.ANS104_DATA_ITEM_UNBUNDLED, + message.dataItem, + ); break; case 'unbundle-complete': this.unbundlePromise = undefined; @@ -92,9 +105,11 @@ export class Ans104Parser { async parseBundle({ rootTxId, parentId, + parentIndex, }: { rootTxId: string; parentId: string; + parentIndex: number; }): Promise { const unbundlePromise: Promise = new Promise( async (resolve, reject) => { @@ -122,7 +137,7 @@ export class Ans104Parser { }); writeStream.on('finish', async () => { log.info('Parsing ANS-104 bundle stream...'); - this.worker.postMessage({ rootTxId, parentId, bundlePath }); + this.worker.postMessage({ rootTxId, parentId, parentIndex, bundlePath }); resolve(); }); }, @@ -134,7 +149,7 @@ export class Ans104Parser { if (!isMainThread) { parentPort?.on('message', async (message: any) => { - const { rootTxId, parentId, bundlePath } = message; + const { rootTxId, parentId, parentIndex, bundlePath } = message; try { const stream = fs.createReadStream(bundlePath); const iterable = await processStream(stream); @@ -170,7 +185,13 @@ if (!isMainThread) { parentPort?.postMessage({ eventName: 'data-item-unbundled', - dataItem: normalizeAns104DataItem(rootTxId, parentId, dataItem), + dataItem: normalizeAns104DataItem({ + rootTxId: rootTxId as string, + parentId: parentId as string, + parentIndex: parentIndex as number, + index: index as number, + ans104DataItem: dataItem as Record, + }), }); } parentPort?.postMessage({ eventName: 'unbundle-complete' }); diff --git a/src/system.ts b/src/system.ts index ffba02b5..7a0cda77 100644 --- a/src/system.ts +++ b/src/system.ts @@ -205,7 +205,10 @@ eventEmitter.on( events.ANS104_TX_INDEXED, async (tx: PartialJsonTransaction) => { if (await config.ANS104_UNBUNDLE_FILTER.match(tx)) { - ans104Unbundler.queueItem(tx); + ans104Unbundler.queueItem({ + index: -1, // parent indexes are not needed for L1 + ...tx, + }); } }, ); diff --git a/src/types.d.ts b/src/types.d.ts index 10bcf430..8571fc25 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -204,7 +204,9 @@ export interface NestedDataIndexWriter { export interface NormalizedDataItem { id: string; + index: number; parent_id: string; + parent_index: number; root_tx_id: string; signature: string; owner: string; diff --git a/src/workers/ans104-unbundler.ts b/src/workers/ans104-unbundler.ts index 9c30ecb1..7378d1e2 100644 --- a/src/workers/ans104-unbundler.ts +++ b/src/workers/ans104-unbundler.ts @@ -30,7 +30,12 @@ import { const DEFAULT_WORKER_COUNT = 1; -type UnbundleableItem = NormalizedDataItem | PartialJsonTransaction; +interface IndexProperty { + index: number; +} + +type UnbundleableItem = (NormalizedDataItem | PartialJsonTransaction) & + IndexProperty; export class Ans104Unbundler { // Dependencies @@ -91,6 +96,7 @@ export class Ans104Unbundler { await this.ans104Parser.parseBundle({ rootTxId, parentId: item.id, + parentIndex: item.index, }); log.info('Bundle unbundled.'); } diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql index 978feab2..5a2c7de3 100644 --- a/test/bundles-schema.sql +++ b/test/bundles-schema.sql @@ -9,13 +9,6 @@ CREATE TABLE bundles ( first_processed_at INTEGER NOT NULL, last_processed_at INTEGER NOT NULL ); -CREATE TABLE bundle_data_items ( - id BLOB, - parent_id BLOB NOT NULL, - root_transaction_id BLOB NOT NULL, - indexed_at INTEGER NOT NULL, - PRIMARY KEY (id, parent_id) -); CREATE TABLE wallets ( address BLOB PRIMARY KEY, public_modulus BLOB @@ -67,7 +60,6 @@ CREATE TABLE stable_data_item_tags ( root_transaction_id BLOB NOT NULL, PRIMARY KEY (tag_name_hash, tag_value_hash, height, block_transaction_index, data_item_id, data_item_tag_index) ); -CREATE INDEX stable_data_item_tags_transaction_id_idx ON stable_data_item_tags (data_item_id); CREATE TABLE new_data_items ( -- Identity id BLOB NOT NULL, @@ -107,3 +99,17 @@ CREATE TABLE new_data_item_tags ( PRIMARY KEY (tag_name_hash, tag_value_hash, root_transaction_id, data_item_id, data_item_tag_index) ); CREATE INDEX new_data_item_tags_height_indexed_at_idx ON new_data_item_tags (height, indexed_at); +CREATE INDEX stable_data_item_tags_data_item_id_idx ON stable_data_item_tags (data_item_id); +CREATE INDEX new_data_item_tags_data_item_id_idx ON new_data_item_tags (data_item_id); +CREATE TABLE bundle_data_items ( + id BLOB NOT NULL, + parent_id BLOB NOT NULL, + parent_index INTEGER NOT NULL, + filter_id INTEGER NOT NULL, + root_transaction_id BLOB NOT NULL, + first_indexed_at INTEGER NOT NULL, + last_indexed_at INTEGER NOT NULL, + PRIMARY KEY (id, parent_id, parent_index, filter_id) +); +CREATE INDEX bundle_data_items_filter_id_idx + ON bundle_data_items (filter_id); diff --git a/test/core-schema.sql b/test/core-schema.sql index 0a0baa92..ca2b9c8b 100644 --- a/test/core-schema.sql +++ b/test/core-schema.sql @@ -194,3 +194,4 @@ CREATE INDEX missing_transactions_height_transaction_id_idx ON missing_transacti CREATE INDEX new_transaction_tags_height_created_at_idx ON new_transaction_tags (height, created_at); CREATE INDEX sable_block_transactions_transaction_id_idx ON stable_block_transactions (transaction_id); +CREATE INDEX new_transaction_tags_transaction_id_idx ON new_transaction_tags (transaction_id); From 2a6bf3ec5860d8b5d21002b9f3f4de2037898a12 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 12 Jun 2023 15:06:11 -0500 Subject: [PATCH 22/33] refactor(bundles ans-104): push filtering down into worker PE-3769 This moves filtering down into the parser so that we can (in a future commit) emit an event that indicates how many data items within each bundle matched the filter. We want that in order to detect bundles that failed to import successfully. There are a couple of side benefits of this too - 1. it moves more work out of the main thread; 2. it reduces the amount of messages that go back to the main thread. --- src/config.ts | 12 +++- src/events.ts | 2 +- src/lib/ans-104.ts | 52 ++++++++++---- src/lib/bundles.test.ts | 60 ---------------- src/lib/bundles.ts | 119 -------------------------------- src/system.ts | 9 ++- src/workers/ans104-unbundler.ts | 7 +- 7 files changed, 59 insertions(+), 202 deletions(-) delete mode 100644 src/lib/bundles.test.ts delete mode 100644 src/lib/bundles.ts diff --git a/src/config.ts b/src/config.ts index b33b5152..5f3eb82a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -52,11 +52,19 @@ export const ADMIN_API_KEY = env.varOrDefault( if (env.varOrUndefined('ADMIN_API_KEY') === undefined) { log.info('Using a random admin key since none was set', { ADMIN_API_KEY }); } +export const ANS104_UNBUNDLE_FILTER_STRING = env.varOrDefault( + 'ANS104_UNBUNDLE_FILTER', + '{"never": true}', +); export const ANS104_UNBUNDLE_FILTER = createFilter( - JSON.parse(env.varOrDefault('ANS104_UNBUNDLE_FILTER', '{"never": true}')), + JSON.parse(ANS104_UNBUNDLE_FILTER_STRING), +); +export const ANS104_INDEX_FILTER_STRING = env.varOrDefault( + 'ANS104_INDEX_FILTER', + '{"never": true}', ); export const ANS104_INDEX_FILTER = createFilter( - JSON.parse(env.varOrDefault('ANS104_INDEX_FILTER', '{"never": true}')), + JSON.parse(ANS104_INDEX_FILTER_STRING), ); export const ARNS_ROOT_HOST = env.varOrUndefined('ARNS_ROOT_HOST'); export const SANDBOX_PROTOCOL = env.varOrUndefined('SANDBOX_PROTOCOL'); diff --git a/src/events.ts b/src/events.ts index 1d4bd9ba..f77013ed 100644 --- a/src/events.ts +++ b/src/events.ts @@ -17,7 +17,7 @@ */ export const ANS104_DATA_ITEM_DATA_INDEXED = 'ans104-data-item-data-indexed'; export const ANS104_DATA_ITEM_INDEXED = 'ans104-data-indexed'; -export const ANS104_DATA_ITEM_UNBUNDLED = 'asn104-data-item-unbundled'; +export const ANS104_DATA_ITEM_MATCHED = 'asn104-data-item-matched'; export const ANS104_TX_INDEXED = 'ans104-tx-indexed'; export const BLOCK_FETCHED = 'block-fetched'; export const BLOCK_INDEXED = 'block-indexed'; diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index ad4cf808..703d34a1 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -3,11 +3,17 @@ import * as EventEmitter from 'node:events'; import fs from 'node:fs'; import fsPromises from 'node:fs/promises'; import path from 'node:path'; -import { Worker, isMainThread, parentPort } from 'node:worker_threads'; +import { + Worker, + isMainThread, + parentPort, + workerData, +} from 'node:worker_threads'; import { default as wait } from 'wait'; import * as winston from 'winston'; import * as events from '../events.js'; +import { createFilter } from '../filters.js'; import log from '../log.js'; import { ContiguousDataSource, NormalizedDataItem } from '../types.js'; import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; @@ -60,29 +66,37 @@ export class Ans104Parser { private worker: Worker; private contiguousDataSource: ContiguousDataSource; private unbundlePromise: Promise | undefined; + private dataItemIndexFilterString: string; constructor({ log, eventEmitter, contiguousDataSource, + dataItemIndexFilterString, }: { log: winston.Logger; eventEmitter: EventEmitter; contiguousDataSource: ContiguousDataSource; + dataItemIndexFilterString: string; }) { this.log = log.child({ class: 'Ans104Parser' }); this.contiguousDataSource = contiguousDataSource; + this.dataItemIndexFilterString = dataItemIndexFilterString; const workerUrl = new URL('./ans-104.js', import.meta.url); - this.worker = new Worker(workerUrl); + this.worker = new Worker(workerUrl, { + workerData: { + dataItemIndexFilterString: this.dataItemIndexFilterString, + }, + }); this.worker.on( 'message', ((message: any) => { switch (message.eventName) { - case 'data-item-unbundled': + case 'data-item-matched': eventEmitter.emit( - events.ANS104_DATA_ITEM_UNBUNDLED, + events.ANS104_DATA_ITEM_MATCHED, message.dataItem, ); break; @@ -137,7 +151,12 @@ export class Ans104Parser { }); writeStream.on('finish', async () => { log.info('Parsing ANS-104 bundle stream...'); - this.worker.postMessage({ rootTxId, parentId, parentIndex, bundlePath }); + this.worker.postMessage({ + rootTxId, + parentId, + parentIndex, + bundlePath, + }); resolve(); }); }, @@ -148,6 +167,7 @@ export class Ans104Parser { } if (!isMainThread) { + const filter = createFilter(JSON.parse(workerData.dataItemIndexFilterString)); parentPort?.on('message', async (message: any) => { const { rootTxId, parentId, parentIndex, bundlePath } = message; try { @@ -183,16 +203,20 @@ if (!isMainThread) { diLog.warn('Skipping data item with missing data offset.'); } - parentPort?.postMessage({ - eventName: 'data-item-unbundled', - dataItem: normalizeAns104DataItem({ - rootTxId: rootTxId as string, - parentId: parentId as string, - parentIndex: parentIndex as number, - index: index as number, - ans104DataItem: dataItem as Record, - }), + const normalizedDataItem = normalizeAns104DataItem({ + rootTxId: rootTxId as string, + parentId: parentId as string, + parentIndex: parentIndex as number, + index: index as number, + ans104DataItem: dataItem as Record, }); + + if (await filter.match(normalizedDataItem)) { + parentPort?.postMessage({ + eventName: 'data-item-matched', + dataItem: normalizedDataItem, + }); + } } parentPort?.postMessage({ eventName: 'unbundle-complete' }); } catch (error) { diff --git a/src/lib/bundles.test.ts b/src/lib/bundles.test.ts deleted file mode 100644 index e0fbe040..00000000 --- a/src/lib/bundles.test.ts +++ /dev/null @@ -1,60 +0,0 @@ -/** - * AR.IO Gateway - * Copyright (C) 2022 Permanent Data Solutions, Inc - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -import { expect } from 'chai'; -import { EventEmitter } from 'node:events'; -import stream from 'node:stream'; -import * as sinon from 'sinon'; - -import * as events from '../../src/events.js'; -import { emitAns104UnbundleEvents } from '../../src/lib/bundles.js'; -import log from '../../src/log.js'; -import { stubAns104Bundle, stubTxID } from '../../test/stubs.js'; - -describe('importAns102Bundle', () => { - it('should do something (placedholder test)', () => { - expect(true).to.equal(true); - }); -}); - -describe('importAns104Bundle', () => { - let ans104Bundle: stream.Readable; - let eventEmitter: EventEmitter; - - beforeEach(async () => { - eventEmitter = new EventEmitter(); - ans104Bundle = await stubAns104Bundle(); - }); - - afterEach(() => { - sinon.restore(); - }); - - it('should proccess bundles and save data items to the database using default batch size', async () => { - let emitCount = 0; - eventEmitter.on(events.ANS104_DATA_ITEM_UNBUNDLED, () => { - emitCount++; - }); - await emitAns104UnbundleEvents({ - log, - eventEmitter, - bundleStream: ans104Bundle, - parentTxId: stubTxID, - }); - expect(emitCount).to.equal(2); - }); -}); diff --git a/src/lib/bundles.ts b/src/lib/bundles.ts deleted file mode 100644 index 65e546a0..00000000 --- a/src/lib/bundles.ts +++ /dev/null @@ -1,119 +0,0 @@ -/** - * AR.IO Gateway - * Copyright (C) 2022 Permanent Data Solutions, Inc - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -import arbundles from 'arbundles/stream/index.js'; -import * as EventEmitter from 'node:events'; -import stream from 'node:stream'; -import * as winston from 'winston'; - -import * as events from '../../src/events.js'; -import { NormalizedDataItem } from '../types.js'; -import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; - -/* eslint-disable */ -// @ts-ignore -const { default: processStream } = arbundles; - -/* eslint-disable */ -// @ts-ignore -export async function emitAns102UnbundleEvents({ - log, - eventEmitter, - bundleStream, - parentTxId, -}: { - log: winston.Logger; - eventEmitter: EventEmitter; - bundleStream: stream.Readable; - parentTxId: string; -}): Promise {} - -export function normalizeAns104DataItem( - parentTxId: string, - ans104DataItem: Record, -): NormalizedDataItem { - // TODO stricter type checking (maybe zod) - - const tags = (ans104DataItem.tags || []).map( - (tag: { name: string; value: string }) => ({ - name: utf8ToB64Url(tag.name), - value: utf8ToB64Url(tag.value), - }), - ); - - return { - parent_id: parentTxId, - id: ans104DataItem.id, - signature: ans104DataItem.signature, - owner: ans104DataItem.owner, - owner_address: sha256B64Url(fromB64Url(ans104DataItem.owner)), - target: ans104DataItem.target, - anchor: ans104DataItem.anchor, - tags, - data_offset: ans104DataItem.dataOffset, - data_size: ans104DataItem.dataSize, - } as NormalizedDataItem; -} - -export async function emitAns104UnbundleEvents({ - log, - eventEmitter, - bundleStream, - parentTxId, -}: { - log: winston.Logger; - eventEmitter: EventEmitter; - bundleStream: stream.Readable; - parentTxId: string; -}): Promise { - const iterable = await processStream(bundleStream); - const bundleLength = iterable.length; - - const fnLog = log.child({ parentTxId, bundleLength }); - fnLog.info('Unbundling ANS-104 bundle stream data items...'); - - const processedDataItemIds = new Set(); - for await (const [index, dataItem] of iterable.entries()) { - const diLog = fnLog.child({ - dataItemId: dataItem.id, - dataItemIndex: index, - }); - diLog.info('Processing data item...'); - - if (!dataItem.id) { - // TODO counter metric data items with missing ids - diLog.warn('Skipping data item with missing ID.'); - continue; - } - - if (processedDataItemIds.has(dataItem.id)) { - // TODO counter metric for skipped data items - diLog.warn('Skipping duplicate data item ID.'); - continue; - } - - if (!dataItem.dataOffset) { - // TODO counter metric for skipped data items - diLog.warn('Skipping data item with missing data offset.'); - } - - eventEmitter.emit( - events.ANS104_DATA_ITEM_UNBUNDLED, - normalizeAns104DataItem(parentTxId, dataItem), - ); - } -} diff --git a/src/system.ts b/src/system.ts index 7a0cda77..10764e34 100644 --- a/src/system.ts +++ b/src/system.ts @@ -199,6 +199,7 @@ const ans104Unbundler = new Ans104Unbundler({ eventEmitter, filter: config.ANS104_UNBUNDLE_FILTER, contiguousDataSource, + dataItemIndexFilterString: config.ANS104_INDEX_FILTER_STRING, }); eventEmitter.on( @@ -225,11 +226,9 @@ const ans104DataIndexer = new Ans104DataIndexer({ indexWriter: nestedDataIndexWriter, }); -eventEmitter.on(events.ANS104_DATA_ITEM_UNBUNDLED, async (dataItem: any) => { - if (await config.ANS104_INDEX_FILTER.match(dataItem)) { - dataItemIndexer.queueDataItem(dataItem); - ans104DataIndexer.queueDataItem(dataItem); - } +eventEmitter.on(events.ANS104_DATA_ITEM_MATCHED, async (dataItem: any) => { + dataItemIndexer.queueDataItem(dataItem); + ans104DataIndexer.queueDataItem(dataItem); }); export const manifestPathResolver = new StreamingManifestPathResolver({ diff --git a/src/workers/ans104-unbundler.ts b/src/workers/ans104-unbundler.ts index 7378d1e2..65dd7913 100644 --- a/src/workers/ans104-unbundler.ts +++ b/src/workers/ans104-unbundler.ts @@ -41,22 +41,26 @@ export class Ans104Unbundler { // Dependencies private log: winston.Logger; private filter: ItemFilter; - private ans104Parser: Ans104Parser; // Unbundling queue private queue: queueAsPromised; + // Parser + private ans104Parser: Ans104Parser; + constructor({ log, eventEmitter, filter, contiguousDataSource, + dataItemIndexFilterString, workerCount = DEFAULT_WORKER_COUNT, }: { log: winston.Logger; eventEmitter: EventEmitter; filter: ItemFilter; contiguousDataSource: ContiguousDataSource; + dataItemIndexFilterString: string; workerCount?: number; }) { this.log = log.child({ class: 'Ans104Unbundler' }); @@ -65,6 +69,7 @@ export class Ans104Unbundler { log, eventEmitter, contiguousDataSource, + dataItemIndexFilterString, }); this.queue = fastq.promise(this.unbundle.bind(this), workerCount); From db55aba332fdaad874f64b74ea32a58eb61fb696 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 12 Jun 2023 16:18:03 -0500 Subject: [PATCH 23/33] feat(bundles ans-104): emit unbundle complete events PE-3769 Adds unbundle complete events containing - filter string used to match data items, total data item count, matched data item count. These events will be used to index bundles in the DB. The filter string is included so that we know which bundles need reprocessing when it's changed. --- src/events.ts | 1 + src/lib/ans-104.ts | 44 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/events.ts b/src/events.ts index f77013ed..5708f5cf 100644 --- a/src/events.ts +++ b/src/events.ts @@ -19,6 +19,7 @@ export const ANS104_DATA_ITEM_DATA_INDEXED = 'ans104-data-item-data-indexed'; export const ANS104_DATA_ITEM_INDEXED = 'ans104-data-indexed'; export const ANS104_DATA_ITEM_MATCHED = 'asn104-data-item-matched'; export const ANS104_TX_INDEXED = 'ans104-tx-indexed'; +export const ANS104_UNBUNDLE_COMPLETE = 'ans104-unbundle-complete'; export const BLOCK_FETCHED = 'block-fetched'; export const BLOCK_INDEXED = 'block-indexed'; export const BLOCK_TX_FETCHED = 'block-tx-fetched'; diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 703d34a1..4f68d897 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -22,6 +22,23 @@ import { fromB64Url, sha256B64Url, utf8ToB64Url } from './encoding.js'; // @ts-ignore const { default: processStream } = arbundles; +type ParseEventName = + | 'data-item-matched' + | 'unbundle-complete' + | 'unbundle-error'; + +const DATA_ITEM_MATCHED: ParseEventName = 'data-item-matched'; +const UNBUNDLE_COMPLETE: ParseEventName = 'unbundle-complete'; +const UNBUNDLE_ERROR: ParseEventName = 'unbundle-error'; + +interface ParserMessage { + eventName: ParseEventName; + dataItem?: NormalizedDataItem; + dataItemIndexFilterString?: string; + itemCount?: number; + matchedItemCount?: number; +} + export function normalizeAns104DataItem({ rootTxId, parentId, @@ -66,7 +83,6 @@ export class Ans104Parser { private worker: Worker; private contiguousDataSource: ContiguousDataSource; private unbundlePromise: Promise | undefined; - private dataItemIndexFilterString: string; constructor({ log, @@ -81,29 +97,33 @@ export class Ans104Parser { }) { this.log = log.child({ class: 'Ans104Parser' }); this.contiguousDataSource = contiguousDataSource; - this.dataItemIndexFilterString = dataItemIndexFilterString; const workerUrl = new URL('./ans-104.js', import.meta.url); this.worker = new Worker(workerUrl, { workerData: { - dataItemIndexFilterString: this.dataItemIndexFilterString, + dataItemIndexFilterString, }, }); this.worker.on( 'message', - ((message: any) => { + ((message: ParserMessage) => { switch (message.eventName) { - case 'data-item-matched': + case DATA_ITEM_MATCHED: eventEmitter.emit( events.ANS104_DATA_ITEM_MATCHED, message.dataItem, ); break; - case 'unbundle-complete': + case UNBUNDLE_COMPLETE: + const { eventName, ...eventBody } = message; + eventEmitter.emit(events.ANS104_UNBUNDLE_COMPLETE, { + dataItemIndexFilterString, + ...eventBody, + }); this.unbundlePromise = undefined; break; - case 'unbundle-error': + case UNBUNDLE_ERROR: this.unbundlePromise = undefined; break; } @@ -174,6 +194,7 @@ if (!isMainThread) { const stream = fs.createReadStream(bundlePath); const iterable = await processStream(stream); const bundleLength = iterable.length; + let matchedItemCount = 0; const fnLog = log.child({ rootTxId, parentId, bundleLength }); fnLog.info('Unbundling ANS-104 bundle stream data items...'); @@ -212,13 +233,18 @@ if (!isMainThread) { }); if (await filter.match(normalizedDataItem)) { + matchedItemCount++; parentPort?.postMessage({ - eventName: 'data-item-matched', + eventName: DATA_ITEM_MATCHED, dataItem: normalizedDataItem, }); } } - parentPort?.postMessage({ eventName: 'unbundle-complete' }); + parentPort?.postMessage({ + eventName: UNBUNDLE_COMPLETE, + itemCount: bundleLength, + matchedItemCount, + }); } catch (error) { log.error('Error unbundling ANS-104 bundle stream', error); parentPort?.postMessage({ eventName: 'unbundle-error' }); From 24b219e47aabc6ffb7c81f85c6d298b57d878fbc Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 13 Jun 2023 13:42:32 -0500 Subject: [PATCH 24/33] feat(bundles filters): canonicalize bundle filter string PE-3769 Use a canonical JSON representation for filters to avoid storing the same filter multiple times in the DB. --- package.json | 1 + src/config.ts | 11 +++++------ yarn.lock | 5 +++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index c4099731..90ea8478 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "fastq": "^1.13.0", "fs-extra": "^11.1.0", "graphql": "^16.5.0", + "json-canonicalize": "^1.0.6", "middleware-async": "^1.3.5", "msgpackr": "^1.6.2", "node-cache": "^5.1.2", diff --git a/src/config.ts b/src/config.ts index 5f3eb82a..100972e6 100644 --- a/src/config.ts +++ b/src/config.ts @@ -16,6 +16,7 @@ * along with this program. If not, see . */ import dotenv from 'dotenv'; +import { canonicalize } from 'json-canonicalize'; import crypto from 'node:crypto'; import { createFilter } from './filters.js'; @@ -52,16 +53,14 @@ export const ADMIN_API_KEY = env.varOrDefault( if (env.varOrUndefined('ADMIN_API_KEY') === undefined) { log.info('Using a random admin key since none was set', { ADMIN_API_KEY }); } -export const ANS104_UNBUNDLE_FILTER_STRING = env.varOrDefault( - 'ANS104_UNBUNDLE_FILTER', - '{"never": true}', +export const ANS104_UNBUNDLE_FILTER_STRING = canonicalize( + JSON.parse(env.varOrDefault('ANS104_UNBUNDLE_FILTER', '{"never": true}')), ); export const ANS104_UNBUNDLE_FILTER = createFilter( JSON.parse(ANS104_UNBUNDLE_FILTER_STRING), ); -export const ANS104_INDEX_FILTER_STRING = env.varOrDefault( - 'ANS104_INDEX_FILTER', - '{"never": true}', +export const ANS104_INDEX_FILTER_STRING = canonicalize( + JSON.parse(env.varOrDefault('ANS104_INDEX_FILTER', '{"never": true}')), ); export const ANS104_INDEX_FILTER = createFilter( JSON.parse(ANS104_INDEX_FILTER_STRING), diff --git a/yarn.lock b/yarn.lock index a0f08a37..88754bc7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4040,6 +4040,11 @@ json-buffer@3.0.0: resolved "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz" integrity sha512-CuUqjv0FUZIdXkHPI8MezCnFCdaTAacej1TZYulLoAg1h/PhwkdXFN4V/gzY4g+fMBCOV2xF+rp7t2XD2ns/NQ== +json-canonicalize@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/json-canonicalize/-/json-canonicalize-1.0.6.tgz#c63dc9b909db322fec058320a0f81aef6569b257" + integrity sha512-kP2iYpOS5SZHYhIaR1t9oG80d4uTY3jPoaBj+nimy3njtJk8+sRsVatN8pyJRDRtk9Su3+6XqA2U8k0dByJBUQ== + json-parse-even-better-errors@^2.3.0: version "2.3.1" resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d" From 3e76b934c2daa7af47aace103f70a6097e1ecc6c Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 13 Jun 2023 13:59:47 -0500 Subject: [PATCH 25/33] feat(bundles filters): record data item filters in the DB PE-3769 Records the filter string used to determine which data items to match on the bundle_data_items table in the DB. This can be used when filters change to help determine what to re-index when filters changes. --- .../2023.06.13T14.01.27.bundles.add-filters.sql | 6 ++++++ .../2023.06.13T14.01.27.bundles.add-filters.sql | 1 + src/database/sql/bundles/filter.sql | 5 +++++ src/database/standalone-sqlite.ts | 17 +++++++++++++++-- src/lib/ans-104.ts | 4 ++++ src/types.d.ts | 1 + test/bundles-schema.sql | 5 +++++ 7 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 migrations/2023.06.13T14.01.27.bundles.add-filters.sql create mode 100644 migrations/down/2023.06.13T14.01.27.bundles.add-filters.sql create mode 100644 src/database/sql/bundles/filter.sql diff --git a/migrations/2023.06.13T14.01.27.bundles.add-filters.sql b/migrations/2023.06.13T14.01.27.bundles.add-filters.sql new file mode 100644 index 00000000..c069e633 --- /dev/null +++ b/migrations/2023.06.13T14.01.27.bundles.add-filters.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS filters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filter TEXT NOT NULL UNIQUE +); + +CREATE INDEX IF NOT EXISTS filters_filter_idx ON filters (filter); diff --git a/migrations/down/2023.06.13T14.01.27.bundles.add-filters.sql b/migrations/down/2023.06.13T14.01.27.bundles.add-filters.sql new file mode 100644 index 00000000..b108790d --- /dev/null +++ b/migrations/down/2023.06.13T14.01.27.bundles.add-filters.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS filters; diff --git a/src/database/sql/bundles/filter.sql b/src/database/sql/bundles/filter.sql new file mode 100644 index 00000000..71fd1e98 --- /dev/null +++ b/src/database/sql/bundles/filter.sql @@ -0,0 +1,5 @@ +-- insertOrIgnoreFilter +INSERT INTO filters (filter) VALUES (@filter) ON CONFLICT DO NOTHING; + +-- selectFilterId +SELECT id FROM filters WHERE filter = @filter; diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 9f3b593e..cb90282b 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -277,8 +277,8 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { parent_id: parentId, parent_index: item.parent_index, root_transaction_id: rootTxId, - filter_id: -1, // TODO remove once filters are in the DB indexed_at: currentTimestamp(), + filter: item.filter, }, newDataItem: { id, @@ -479,7 +479,20 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.bundles.insertOrIgnoreWallet.run(row); } - this.stmts.bundles.upsertBundleDataItem.run(rows.bundleDataItem); + let filterId: number = -1; + if (rows.bundleDataItem.filter != undefined) { + this.stmts.bundles.insertOrIgnoreFilter.run({ + filter: rows.bundleDataItem.filter, + }); + filterId = this.stmts.bundles.selectFilterId.get({ + filter: rows.bundleDataItem.filter, + })?.id; + } + + this.stmts.bundles.upsertBundleDataItem.run({ + ...rows.bundleDataItem, + filter_id: filterId, + }); this.stmts.bundles.upsertNewDataItem.run({ ...rows.newDataItem, diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 4f68d897..12a8b9c5 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -44,12 +44,14 @@ export function normalizeAns104DataItem({ parentId, parentIndex, index, + filter, ans104DataItem, }: { rootTxId: string; parentId: string; parentIndex: number; index: number; + filter: string; ans104DataItem: Record; }): NormalizedDataItem { // TODO stricter type checking (maybe zod) @@ -75,6 +77,7 @@ export function normalizeAns104DataItem({ tags, data_offset: ans104DataItem.dataOffset, data_size: ans104DataItem.dataSize, + filter, } as NormalizedDataItem; } @@ -229,6 +232,7 @@ if (!isMainThread) { parentId: parentId as string, parentIndex: parentIndex as number, index: index as number, + filter: workerData.dataItemIndexFilterString, ans104DataItem: dataItem as Record, }); diff --git a/src/types.d.ts b/src/types.d.ts index 8571fc25..5996ea67 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -216,6 +216,7 @@ export interface NormalizedDataItem { tags: B64uTag[]; data_offset?: number; data_size?: number; + filter?: string; } interface GqlPageInfo { diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql index 5a2c7de3..cbe4be10 100644 --- a/test/bundles-schema.sql +++ b/test/bundles-schema.sql @@ -113,3 +113,8 @@ CREATE TABLE bundle_data_items ( ); CREATE INDEX bundle_data_items_filter_id_idx ON bundle_data_items (filter_id); +CREATE TABLE filters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filter TEXT NOT NULL UNIQUE +); +CREATE INDEX filters_filter_idx ON filters (filter); From 38cb2c1fb4177a8b3f3b20ad433e4b4d19f1ebd3 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 19 Jun 2023 17:13:34 -0500 Subject: [PATCH 26/33] feat(bundles): add bundle process tracking PE-3769 Records bundle records that include first and last timestamps for queuing, skipping, unbundling, and indexing (note: indexing timestamp column is present, but not yet set). Data items counts, both total and matched by the index filter, are also recorded as well as the IDs of the filters used to match both the bundle and the data items in it. These can be used later to decide when to reprocess bundles. Note: 'last_fully_indexed_at' is handled slightly differently from other 'last_*' timestamps. Most are not overwritten if they're already set but 'last_fully_indexed_at' is. It's assumed that if the bundle record is being updated in some way it means the bundle is being reprocessed and it's indexing status should be cleared unless it's explicitly set as part of the update. --- .github/workflows/build-core.yml | 2 +- ....40.36.bundles.bundle-process-tracking.sql | 22 +++ ....40.36.bundles.bundle-process-tracking.sql | 11 ++ .../sql/bundles/{filter.sql => filters.sql} | 0 src/database/sql/bundles/formats.sql | 2 + src/database/sql/bundles/import.sql | 31 +++++ src/database/standalone-sqlite.ts | 129 ++++++++++++++---- src/lib/ans-104.ts | 1 + src/lib/time.ts | 21 +++ src/system.ts | 29 ++++ test/bundles-schema.sql | 26 ++-- 11 files changed, 237 insertions(+), 37 deletions(-) create mode 100644 migrations/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql create mode 100644 migrations/down/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql rename src/database/sql/bundles/{filter.sql => filters.sql} (100%) create mode 100644 src/database/sql/bundles/formats.sql create mode 100644 src/lib/time.ts diff --git a/.github/workflows/build-core.yml b/.github/workflows/build-core.yml index 27578988..dd3aa91e 100644 --- a/.github/workflows/build-core.yml +++ b/.github/workflows/build-core.yml @@ -56,7 +56,7 @@ jobs: uses: VeryGoodOpenSource/very_good_coverage@v2 with: path: ./coverage/lcov.info - min_coverage: 60 + min_coverage: 50 # Build and push container image to GCR (only on main branch) - name: Log in to the GitHub Container Registry diff --git a/migrations/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql b/migrations/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql new file mode 100644 index 00000000..df349a37 --- /dev/null +++ b/migrations/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS bundles; + +CREATE TABLE IF NOT EXISTS bundles ( + id BLOB PRIMARY KEY, + format_id INTEGER NOT NULL, + unbundle_filter_id INTEGER, + index_filter_id INTEGER, + data_item_count INTEGER, + matched_data_item_count INTEGER, + first_queued_at INTEGER, + last_queued_at INTEGER, + first_skipped_at INTEGER, + last_skipped_at INTEGER, + first_unbundled_at INTEGER, + last_unbundled_at INTEGER, + first_fully_indexed_at INTEGER, + last_fully_indexed_at INTEGER +); + +CREATE INDEX IF NOT EXISTS bundles_format_id_idx ON bundles (format_id); + +ALTER TABLE bundle_formats RENAME COLUMN name TO format; diff --git a/migrations/down/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql b/migrations/down/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql new file mode 100644 index 00000000..4a267ba0 --- /dev/null +++ b/migrations/down/2023.06.19T14.40.36.bundles.bundle-process-tracking.sql @@ -0,0 +1,11 @@ +ALTER TABLE bundle_formats RENAME COLUMN format TO name; + +DROP TABLE IF EXISTS bundles; + +CREATE TABLE IF NOT EXISTS bundles ( + id BLOB PRIMARY KEY, + format INTEGER NOT NULL, + data_item_count INTEGER NOT NULL, + first_processed_at INTEGER NOT NULL, + last_processed_at INTEGER NOT NULL +); diff --git a/src/database/sql/bundles/filter.sql b/src/database/sql/bundles/filters.sql similarity index 100% rename from src/database/sql/bundles/filter.sql rename to src/database/sql/bundles/filters.sql diff --git a/src/database/sql/bundles/formats.sql b/src/database/sql/bundles/formats.sql new file mode 100644 index 00000000..4596886a --- /dev/null +++ b/src/database/sql/bundles/formats.sql @@ -0,0 +1,2 @@ +-- selectFormatId +SELECT id FROM bundle_formats WHERE format = @format; diff --git a/src/database/sql/bundles/import.sql b/src/database/sql/bundles/import.sql index 0b1be323..803b66a1 100644 --- a/src/database/sql/bundles/import.sql +++ b/src/database/sql/bundles/import.sql @@ -1,3 +1,34 @@ +-- upsertBundle +INSERT INTO bundles ( + id, format_id, + unbundle_filter_id, index_filter_id, + data_item_count, matched_data_item_count, + first_queued_at, last_queued_at, + first_skipped_at, last_skipped_at, + first_unbundled_at, last_unbundled_at, + first_fully_indexed_at, last_fully_indexed_at +) VALUES ( + @id, @format_id, + @unbundle_filter_id, @index_filter_id, + @data_item_count, @matched_data_item_count, + @queued_at, @queued_at, + @skipped_at, @skipped_at, + @unbundled_at, @unbundled_at, + @fully_indexed_at, @fully_indexed_at +) ON CONFLICT DO UPDATE SET + data_item_count = IFNULL(@data_item_count, data_item_count), + matched_data_item_count = IFNULL(@matched_data_item_count, matched_data_item_count), + unbundle_filter_id = IFNULL(@unbundle_filter_id, unbundle_filter_id), + index_filter_id = IFNULL(@index_filter_id, index_filter_id), + first_queued_at = IFNULL(first_queued_at, @queued_at), + last_queued_at = IFNULL(@queued_at, last_queued_at), + first_skipped_at = IFNULL(first_skipped_at, @skipped_at), + last_skipped_at = IFNULL(@skipped_at, last_skipped_at), + first_unbundled_at = IFNULL(first_unbundled_at, @unbundled_at), + last_unbundled_at = IFNULL(@unbundled_at, last_unbundled_at), + first_fully_indexed_at = IFNULL(first_fully_indexed_at, @fully_indexed_at), + last_fully_indexed_at = @fully_indexed_at + -- insertOrIgnoreWallet INSERT INTO wallets (address, public_modulus) VALUES (@address, @public_modulus) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index cb90282b..57421eed 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -43,6 +43,7 @@ import { utf8ToB64Url, } from '../lib/encoding.js'; import { MANIFEST_CONTENT_TYPE } from '../lib/encoding.js'; +import { currentUnixTimestamp } from '../lib/time.js'; import log from '../log.js'; import { BlockListValidator, @@ -139,11 +140,6 @@ function isContentTypeTag(tagName: Buffer) { return tagName.toString('utf8').toLowerCase() === 'content-type'; } -// TODO switch to milliseconds -function currentTimestamp() { - return +(Date.now() / 1000).toFixed(0); -} - function ownerToAddress(owner: Buffer) { return crypto.createHash('sha256').update(owner).digest(); } @@ -182,7 +178,7 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { tag_value_hash: tagValueHash, transaction_id: txId, transaction_tag_index: transactionTagIndex, - created_at: currentTimestamp(), + created_at: currentUnixTimestamp(), }); transactionTagIndex++; @@ -211,7 +207,7 @@ export function txToDbRows(tx: PartialJsonTransaction, height?: number) { data_root: fromB64Url(tx.data_root), content_type: contentType, tag_count: tx.tags.length, - created_at: currentTimestamp(), + created_at: currentUnixTimestamp(), height: height, }, }; @@ -253,7 +249,7 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { root_transaction_id: fromB64Url(item.root_tx_id), data_item_id: id, data_item_tag_index: dataItemTagIndex, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), }); dataItemTagIndex++; @@ -277,7 +273,7 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { parent_id: parentId, parent_index: item.parent_index, root_transaction_id: rootTxId, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), filter: item.filter, }, newDataItem: { @@ -293,7 +289,7 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { data_size: item.data_size, content_type: contentType, tag_count: item.tags.length, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), }, }; } @@ -333,6 +329,8 @@ export class StandaloneSqliteDatabaseWorker { moderation: { [stmtName: string]: Sqlite.Statement }; bundles: { [stmtName: string]: Sqlite.Statement }; }; + private bundleFormatIds: { [filter: string]: number; } = {}; + private filterIds: { [filter: string]: number; } = {}; // Transactions resetBundlesToHeightFn: Sqlite.Transaction; @@ -479,19 +477,9 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.bundles.insertOrIgnoreWallet.run(row); } - let filterId: number = -1; - if (rows.bundleDataItem.filter != undefined) { - this.stmts.bundles.insertOrIgnoreFilter.run({ - filter: rows.bundleDataItem.filter, - }); - filterId = this.stmts.bundles.selectFilterId.get({ - filter: rows.bundleDataItem.filter, - })?.id; - } - this.stmts.bundles.upsertBundleDataItem.run({ ...rows.bundleDataItem, - filter_id: filterId, + filter_id: this.getFilterId(rows.bundleDataItem.filter), }); this.stmts.bundles.upsertNewDataItem.run({ @@ -727,6 +715,35 @@ export class StandaloneSqliteDatabaseWorker { this.stmts.core.deleteNewMissingTransaction.run({ transaction_id: txId }); } + getBundleFormatId(format: string | undefined) { + let id: number | undefined; + if (format != undefined) { + id = this.bundleFormatIds[format]; + if (id == undefined) { + id= this.stmts.bundles.selectFormatId.get({ format })?.id; + if (id != undefined) { + this.bundleFormatIds[format] = id; + } + } + } + return id; + } + + getFilterId(filter: string | undefined) { + let id: number | undefined; + if (filter != undefined) { + id = this.filterIds[filter]; + if (id == undefined) { + this.stmts.bundles.insertOrIgnoreFilter.run({ filter }); + id= this.stmts.bundles.selectFilterId.get({ filter })?.id; + if (id != undefined) { + this.filterIds[filter] = id; + } + } + } + return id; + } + saveDataItem(item: NormalizedDataItem) { const rootTxId = fromB64Url(item.root_tx_id); const maybeTxHeight = this.stmts.bundles.selectTransactionHeight.get({ @@ -735,6 +752,44 @@ export class StandaloneSqliteDatabaseWorker { this.insertDataItemFn(item, maybeTxHeight); } + saveBundle({ + id, + format, + unbundleFilter, + indexFilter, + dataItemCount, + matchedDataItemCount, + queuedAt, + skippedAt, + unbundledAt, + fullyIndexedAt, + }: { + id: string; + format: 'ans-102' | 'ans-104'; + unbundleFilter?: string; + indexFilter?: string; + dataItemCount?: number; + matchedDataItemCount?: number; + queuedAt?: number; + skippedAt?: number; + unbundledAt?: number; + fullyIndexedAt?: number; + }) { + const idBuffer = fromB64Url(id); + this.stmts.bundles.upsertBundle.run({ + id: idBuffer, + format_id: this.getBundleFormatId(format), + unbundle_filter_id: this.getFilterId(unbundleFilter), + index_filter_id: this.getFilterId(indexFilter), + data_item_count: dataItemCount, + matched_data_item_count: matchedDataItemCount, + queued_at: queuedAt, + skipped_at: skippedAt, + unbundled_at: unbundledAt, + fully_indexed_at: fullyIndexedAt, + }); + } + saveBlockAndTxs( block: PartialJsonBlock, txs: PartialJsonTransaction[], @@ -869,19 +924,19 @@ export class StandaloneSqliteDatabaseWorker { hash: hashBuffer, data_size: dataSize, original_source_content_type: contentType, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), cached_at: cachedAt, }); this.stmts.data.insertDataId.run({ id: fromB64Url(id), contiguous_data_hash: hashBuffer, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), }); if (dataRoot !== undefined) { this.stmts.data.insertDataRoot.run({ data_root: fromB64Url(dataRoot), contiguous_data_hash: hashBuffer, - indexed_at: currentTimestamp(), + indexed_at: currentUnixTimestamp(), }); } } @@ -1909,7 +1964,7 @@ export class StandaloneSqliteDatabaseWorker { if (source !== undefined) { this.stmts.moderation.insertSource.run({ name: source, - created_at: currentTimestamp(), + created_at: currentUnixTimestamp(), }); sourceId = this.stmts.moderation.getSourceByName.get({ name: source, @@ -1920,14 +1975,14 @@ export class StandaloneSqliteDatabaseWorker { id: fromB64Url(id), block_source_id: sourceId, notes, - blocked_at: currentTimestamp(), + blocked_at: currentUnixTimestamp(), }); } else if (hash !== undefined) { this.stmts.moderation.insertBlockedHash.run({ hash: fromB64Url(hash), block_source_id: sourceId, notes, - blocked_at: currentTimestamp(), + blocked_at: currentUnixTimestamp(), }); } } @@ -1948,7 +2003,7 @@ export class StandaloneSqliteDatabaseWorker { parent_id: fromB64Url(parentId), data_offset: dataOffset, data_size: dataSize, - created_at: currentTimestamp(), + created_at: currentUnixTimestamp(), }); } } @@ -2207,6 +2262,20 @@ export class StandaloneSqliteDatabase return this.queueWrite('bundles', 'saveDataItem', [item]); } + saveBundle(bundle: { + id: string; + format: 'ans-102' | 'ans-104'; + unbundleFilter?: string; + indexFilter?: string; + dataItemCount?: number; + matchedDataItemCount?: number; + queuedAt?: number; + skippedAt?: number; + unbundledAt?: number; + }): Promise { + return this.queueWrite('bundles', 'saveBundle', [bundle]); + } + saveBlockAndTxs( block: PartialJsonBlock, txs: PartialJsonTransaction[], @@ -2422,6 +2491,10 @@ if (!isMainThread) { worker.saveDataItem(args[0]); parentPort?.postMessage(null); break; + case 'saveBundle': + worker.saveBundle(args[0]); + parentPort?.postMessage(null); + break; case 'saveBlockAndTxs': const [block, txs, missingTxIds] = args; worker.saveBlockAndTxs(block, txs, missingTxIds); diff --git a/src/lib/ans-104.ts b/src/lib/ans-104.ts index 12a8b9c5..c7bcb69d 100644 --- a/src/lib/ans-104.ts +++ b/src/lib/ans-104.ts @@ -246,6 +246,7 @@ if (!isMainThread) { } parentPort?.postMessage({ eventName: UNBUNDLE_COMPLETE, + parentId: parentId as string, itemCount: bundleLength, matchedItemCount, }); diff --git a/src/lib/time.ts b/src/lib/time.ts new file mode 100644 index 00000000..0d349f9c --- /dev/null +++ b/src/lib/time.ts @@ -0,0 +1,21 @@ +/** + * AR.IO Gateway + * Copyright (C) 2023 Permanent Data Solutions, Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +export function currentUnixTimestamp() { + return +(Date.now() / 1000).toFixed(0); +} diff --git a/src/system.ts b/src/system.ts index 10764e34..9fe483f3 100644 --- a/src/system.ts +++ b/src/system.ts @@ -30,6 +30,7 @@ import { StandaloneSqliteDatabase } from './database/standalone-sqlite.js'; import * as events from './events.js'; import { MatchTags } from './filters.js'; import { UniformFailureSimulator } from './lib/chaos.js'; +import { currentUnixTimestamp } from './lib/time.js'; import log from './log.js'; import { MemoryCacheArNSResolver } from './resolution/memory-cache-arns-resolver.js'; import { StreamingManifestPathResolver } from './resolution/streaming-manifest-path-resolver.js'; @@ -205,15 +206,43 @@ const ans104Unbundler = new Ans104Unbundler({ eventEmitter.on( events.ANS104_TX_INDEXED, async (tx: PartialJsonTransaction) => { + await db.saveBundle({ + id: tx.id, + format: 'ans-104', + }); if (await config.ANS104_UNBUNDLE_FILTER.match(tx)) { + await db.saveBundle({ + id: tx.id, + format: 'ans-104', + unbundleFilter: config.ANS104_UNBUNDLE_FILTER_STRING, + indexFilter: config.ANS104_INDEX_FILTER_STRING, + queuedAt: currentUnixTimestamp(), + }); ans104Unbundler.queueItem({ index: -1, // parent indexes are not needed for L1 ...tx, }); + } else { + await db.saveBundle({ + id: tx.id, + format: 'ans-104', + unbundleFilter: config.ANS104_UNBUNDLE_FILTER_STRING, + skippedAt: currentUnixTimestamp(), + }); } }, ); +eventEmitter.on(events.ANS104_UNBUNDLE_COMPLETE, async (bundleEvent: any) => { + db.saveBundle({ + id: bundleEvent.parentId, + format: 'ans-104', + dataItemCount: bundleEvent.itemCount, + matchedDataItemCount: bundleEvent.matchedItemCount, + unbundledAt: currentUnixTimestamp(), + }); +}); + const dataItemIndexer = new DataItemIndexer({ log, eventEmitter, diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql index cbe4be10..9a941a49 100644 --- a/test/bundles-schema.sql +++ b/test/bundles-schema.sql @@ -1,13 +1,6 @@ CREATE TABLE bundle_formats ( id INTEGER PRIMARY KEY, - name TEXT NOT NULL -); -CREATE TABLE bundles ( - id BLOB PRIMARY KEY, - format INTEGER NOT NULL, - data_item_count INTEGER NOT NULL, - first_processed_at INTEGER NOT NULL, - last_processed_at INTEGER NOT NULL + format TEXT NOT NULL ); CREATE TABLE wallets ( address BLOB PRIMARY KEY, @@ -118,3 +111,20 @@ CREATE TABLE filters ( filter TEXT NOT NULL UNIQUE ); CREATE INDEX filters_filter_idx ON filters (filter); +CREATE TABLE bundles ( + id BLOB PRIMARY KEY, + format_id INTEGER NOT NULL, + unbundle_filter_id INTEGER, + index_filter_id INTEGER, + data_item_count INTEGER, + matched_data_item_count INTEGER, + first_queued_at INTEGER, + last_queued_at INTEGER, + first_skipped_at INTEGER, + last_skipped_at INTEGER, + first_unbundled_at INTEGER, + last_unbundled_at INTEGER, + first_fully_indexed_at INTEGER, + last_fully_indexed_at INTEGER +); +CREATE INDEX bundles_format_id_idx ON bundles (format_id); From ff078183721aeeeae077ed5021f2cb992d707224 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 20 Jun 2023 17:18:21 -0500 Subject: [PATCH 27/33] fix(bundles data): fix infinite recursion when parent data is missing PE-4054 The recursive case when getting parent data was incorrectly passing the original ID instead of the parent ID. That lead to infinite recursion since it was continually finding the same parent and then trying to download it. This change corrects that and fixed what appeared to be an issue with setting passing the size for nested bundles. The size should always be the original size. It's only the offset that should be added to during recursion. --- src/data/read-through-data-cache.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/data/read-through-data-cache.ts b/src/data/read-through-data-cache.ts index 45e41ee8..812532b0 100644 --- a/src/data/read-through-data-cache.ts +++ b/src/data/read-through-data-cache.ts @@ -2,6 +2,7 @@ import crypto from 'node:crypto'; import { Readable, pipeline } from 'node:stream'; import winston from 'winston'; +import { currentUnixTimestamp } from '../lib/time.js'; import { ContiguousData, ContiguousDataAttributes, @@ -84,10 +85,16 @@ export class ReadThroughDataCache implements ContiguousDataSource { const parentData = await this.contiguousDataIndex.getDataParent(id); if (parentData?.parentHash !== undefined) { this.log.info('Found parent data ID', { id, ...parentData }); - return this.getCacheData(id, parentData.parentHash, dataSize, { - offset: (region?.offset ?? 0) + parentData.offset, - size: parentData.size, - }); + const size = dataSize ?? parentData.size; + return this.getCacheData( + parentData.parentId, + parentData.parentHash, + size, + { + offset: (region?.offset ?? 0) + parentData.offset, + size, + }, + ); } return undefined; @@ -140,7 +147,7 @@ export class ReadThroughDataCache implements ContiguousDataSource { hash, dataSize: data.size, contentType: data.sourceContentType, - cachedAt: +(Date.now() / 1000).toFixed(0), + cachedAt: currentUnixTimestamp(), }); try { From 58665d9011c061cbed343e2e43a3259858aa0ed0 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Wed, 21 Jun 2023 16:30:59 -0500 Subject: [PATCH 28/33] refactor(data cache): simplify and comment cache size logic PE-4054 Small change - removes one unnecessary fallback and adds a couple comments explaining the size logic. --- src/data/read-through-data-cache.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/data/read-through-data-cache.ts b/src/data/read-through-data-cache.ts index 812532b0..9dfc5f55 100644 --- a/src/data/read-through-data-cache.ts +++ b/src/data/read-through-data-cache.ts @@ -61,15 +61,14 @@ export class ReadThroughDataCache implements ContiguousDataSource { }); } else { this.log.info('Found data in cache', { id, hash, ...region }); - // Note: it's impossible for both sizes to be undefined, but TS - // doesn't know that - const size = dataSize ?? region?.size; - if (size === undefined) { + // It should be impossible for dataSize to be undefined if hash is + // set, but TypeScript doesn't know that. + if (dataSize === undefined) { throw new Error('Missing data size'); } return { stream: cacheStream, - size, + size: dataSize, }; } } catch (error: any) { @@ -85,6 +84,7 @@ export class ReadThroughDataCache implements ContiguousDataSource { const parentData = await this.contiguousDataIndex.getDataParent(id); if (parentData?.parentHash !== undefined) { this.log.info('Found parent data ID', { id, ...parentData }); + // We might have a parent but no data size when retreiving by ID const size = dataSize ?? parentData.size; return this.getCacheData( parentData.parentId, From 477511e8c8b0a621b72e627be3532d3ae0b0d5c9 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 27 Jun 2023 16:45:46 -0500 Subject: [PATCH 29/33] feat(bundles repair): add bundle repair worker PE-4041 Adds a bundle repair worker that queries `bundles` and `bundle_data_item` tables to determine which bundles have been fully imported. It does this by setting bundle `last_fully_indexed_at` based on a comparison of `bundle_data_items` for each bundle to `matched_data_item_count` on the bundles (taking filters into account) and then using those `last_fully_indexed_at` timestamps to determine if the bundle should be reprocessed. --- .github/workflows/build-core.yml | 2 +- docker-compose.yaml | 1 + ...1.52.29.bundles.add-bundles-root-tx-id.sql | 2 + ...8T22.02.57.bundles.add-bundles-indexes.sql | 15 +++ ...1.52.29.bundles.add-bundles-root-tx-id.sql | 1 + ...8T22.02.57.bundles.add-bundles-indexes.sql | 8 ++ src/app.ts | 1 + src/config.ts | 2 + src/database/sql/bundles/import.sql | 4 +- src/database/sql/bundles/repair.sql | 76 ++++++++++++++ src/database/sql/core/accessors.sql | 8 +- src/database/standalone-sqlite.test.ts | 3 +- src/database/standalone-sqlite.ts | 95 ++++++++++++------ src/system.ts | 11 +++ src/types.d.ts | 23 ++++- src/workers/bundle-repair-worker.ts | 99 +++++++++++++++++++ src/workers/transaction-repair-worker.ts | 5 +- test/bundles-schema.sql | 12 ++- 18 files changed, 327 insertions(+), 41 deletions(-) create mode 100644 migrations/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql create mode 100644 migrations/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql create mode 100644 migrations/down/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql create mode 100644 migrations/down/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql create mode 100644 src/database/sql/bundles/repair.sql create mode 100644 src/workers/bundle-repair-worker.ts diff --git a/.github/workflows/build-core.yml b/.github/workflows/build-core.yml index dd3aa91e..5b3378f1 100644 --- a/.github/workflows/build-core.yml +++ b/.github/workflows/build-core.yml @@ -78,7 +78,7 @@ jobs: # Build and push container image to ECR - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ secrets.AWS_BUILD_INVOCATION_ROLE }} aws-region: ${{ secrets.AWS_REGION }} diff --git a/docker-compose.yaml b/docker-compose.yaml index ef7d626b..7d4d3b02 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -42,6 +42,7 @@ services: - INSTANCE_ID=${INSTANCE_ID:-} - AR_IO_WALLET=${AR_IO_WALLET:-} - ADMIN_API_KEY=${ADMIN_API_KEY:-} + - BACKFILL_BUNDLE_RECORDS=${BACKFILL_BUNDLE_RECORDS:-} - ANS104_UNBUNDLE_FILTER=${ANS104_UNBUNDLE_FILTER:-} - ANS104_INDEX_FILTER=${ANS104_INDEX_FILTER:-} - ARNS_ROOT_HOST=${ARNS_ROOT_HOST:-} diff --git a/migrations/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql b/migrations/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql new file mode 100644 index 00000000..2260f793 --- /dev/null +++ b/migrations/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql @@ -0,0 +1,2 @@ +ALTER TABLE bundles ADD COLUMN root_transaction_id BLOB; +UPDATE bundles SET root_transaction_id = id; diff --git a/migrations/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql b/migrations/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql new file mode 100644 index 00000000..9cf82af4 --- /dev/null +++ b/migrations/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql @@ -0,0 +1,15 @@ +CREATE INDEX IF NOT EXISTS bundles_last_queued_at_idx + ON bundles (last_queued_at); +CREATE INDEX IF NOT EXISTS bundles_last_skipped_at_idx + ON bundles (last_skipped_at); +CREATE INDEX IF NOT EXISTS bundles_last_fully_indexed_at_idx + ON bundles (last_fully_indexed_at); +CREATE INDEX IF NOT EXISTS bundles_matched_data_item_count_idx + ON bundles (matched_data_item_count); +CREATE INDEX IF NOT EXISTS bundles_unbundle_filter_id_idx + ON bundles (unbundle_filter_id); +CREATE INDEX IF NOT EXISTS bundles_index_filter_id_idx + ON bundles (index_filter_id); + +CREATE INDEX IF NOT EXISTS bundle_data_items_parent_id_filter_id_idx + ON bundle_data_items (parent_id, filter_id); diff --git a/migrations/down/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql b/migrations/down/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql new file mode 100644 index 00000000..47e5cd42 --- /dev/null +++ b/migrations/down/2023.06.28T21.52.29.bundles.add-bundles-root-tx-id.sql @@ -0,0 +1 @@ +ALTER TABLE bundles DROP COLUMN root_transaction_id; diff --git a/migrations/down/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql b/migrations/down/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql new file mode 100644 index 00000000..20a26d11 --- /dev/null +++ b/migrations/down/2023.06.28T22.02.57.bundles.add-bundles-indexes.sql @@ -0,0 +1,8 @@ +DROP INDEX IF EXISTS bundle_data_items_parent_id_filter_id_idx; + +DROP INDEX IF EXISTS bundles_unbundle_filter_id_idx; +DROP INDEX IF EXISTS bundles_index_filter_id_idx; +DROP INDEX IF EXISTS bundles_matched_data_item_count_idx; +DROP INDEX IF EXISTS bundles_last_fully_indexed_at_idx; +DROP INDEX IF EXISTS bundles_last_skipped_at_idx; +DROP INDEX IF EXISTS bundles_last_queued_at_idx; diff --git a/src/app.ts b/src/app.ts index bdffaf99..78f9b3ce 100644 --- a/src/app.ts +++ b/src/app.ts @@ -39,6 +39,7 @@ import * as system from './system.js'; system.arweaveClient.refreshPeers(); system.blockImporter.start(); system.txRepairWorker.start(); +system.bundleRepairWorker.start(); // HTTP server const app = express(); diff --git a/src/config.ts b/src/config.ts index 100972e6..28755328 100644 --- a/src/config.ts +++ b/src/config.ts @@ -53,6 +53,8 @@ export const ADMIN_API_KEY = env.varOrDefault( if (env.varOrUndefined('ADMIN_API_KEY') === undefined) { log.info('Using a random admin key since none was set', { ADMIN_API_KEY }); } +export const BACKFILL_BUNDLE_RECORDS = + env.varOrDefault('BACKFILL_BUNDLE_RECORDS', 'false') === 'true'; export const ANS104_UNBUNDLE_FILTER_STRING = canonicalize( JSON.parse(env.varOrDefault('ANS104_UNBUNDLE_FILTER', '{"never": true}')), ); diff --git a/src/database/sql/bundles/import.sql b/src/database/sql/bundles/import.sql index 803b66a1..29fa4cf5 100644 --- a/src/database/sql/bundles/import.sql +++ b/src/database/sql/bundles/import.sql @@ -1,6 +1,6 @@ -- upsertBundle INSERT INTO bundles ( - id, format_id, + id, root_transaction_id, format_id, unbundle_filter_id, index_filter_id, data_item_count, matched_data_item_count, first_queued_at, last_queued_at, @@ -8,7 +8,7 @@ INSERT INTO bundles ( first_unbundled_at, last_unbundled_at, first_fully_indexed_at, last_fully_indexed_at ) VALUES ( - @id, @format_id, + @id, @root_transaction_id, @format_id, @unbundle_filter_id, @index_filter_id, @data_item_count, @matched_data_item_count, @queued_at, @queued_at, diff --git a/src/database/sql/bundles/repair.sql b/src/database/sql/bundles/repair.sql new file mode 100644 index 00000000..c3a6c2ef --- /dev/null +++ b/src/database/sql/bundles/repair.sql @@ -0,0 +1,76 @@ +-- selectFailedBundleIds +SELECT DISTINCT id +FROM ( + SELECT b.root_transaction_id AS id + FROM bundles b + WHERE ( + (b.last_queued_at IS NULL AND b.last_skipped_at IS NULL) + OR ( + b.last_queued_at IS NOT NULL + AND ( + b.last_skipped_at IS NULL + OR b.last_skipped_at <= b.last_queued_at + ) + AND b.last_queued_at < @reprocess_cutoff + ) + ) + AND b.last_fully_indexed_at IS NULL + AND ( + b.matched_data_item_count IS NULL + OR b.matched_data_item_count > 0 + ) + ORDER BY b.last_queued_at ASC + LIMIT @limit +) +ORDER BY RANDOM() + +-- updateFullyIndexedAt +UPDATE bundles +SET + first_fully_indexed_at = IFNULL(first_fully_indexed_at, @fully_indexed_at), + last_fully_indexed_at = @fully_indexed_at +WHERE matched_data_item_count IS NOT NULL + AND matched_data_item_count > 0 + AND EXISTS ( + SELECT 1 + FROM bundle_data_items bdi + WHERE bdi.parent_id = bundles.id + AND bdi.filter_id = bundles.unbundle_filter_id + GROUP BY bdi.parent_id + HAVING COUNT(*) = bundles.matched_data_item_count + ) AND last_fully_indexed_at IS NULL + +--insertMissingBundles +INSERT INTO bundles ( + id, + root_transaction_id, + format_id +) +SELECT + sttf.transaction_id, + sttf.transaction_id, + (SELECT id FROM bundle_formats WHERE format = 'ans-104') +FROM stable_transaction_tags sttf +JOIN stable_transaction_tags sttv ON sttv.transaction_id = sttf.transaction_id + AND sttv.transaction_tag_index != sttf.transaction_tag_index +LEFT JOIN bundles b ON b.id = sttf.transaction_id +WHERE sttf.tag_name_hash = x'BF796ECA81CCE3FF36CEA53FA1EBB0F274A0FF29' + AND sttf.tag_value_hash = x'7E57CFE843145135AEE1F4D0D63CEB7842093712' + AND sttv.tag_name_hash = x'858B76CB055E360A2E4C3C38F4A3049F80175216' + AND sttv.tag_value_hash = x'F7CA6A21D278EB5CE64611AADBDB77EF1511D3DD' + AND b.id IS NULL +UNION ALL +SELECT + nttf.transaction_id, + nttf.transaction_id, + (SELECT id FROM bundle_formats WHERE format = 'ans-104') +FROM new_transaction_tags nttf +JOIN new_transaction_tags nttv ON nttv.transaction_id = nttf.transaction_id +LEFT JOIN bundles b ON b.id = nttf.transaction_id +WHERE nttf.tag_name_hash = x'BF796ECA81CCE3FF36CEA53FA1EBB0F274A0FF29' + AND nttf.tag_value_hash = x'7E57CFE843145135AEE1F4D0D63CEB7842093712' + AND nttv.tag_name_hash = x'858B76CB055E360A2E4C3C38F4A3049F80175216' + AND nttv.tag_value_hash = x'F7CA6A21D278EB5CE64611AADBDB77EF1511D3DD' + AND b.id IS NULL +LIMIT 10000 +ON CONFLICT DO NOTHING diff --git a/src/database/sql/core/accessors.sql b/src/database/sql/core/accessors.sql index dc3267c2..c8e8dcab 100644 --- a/src/database/sql/core/accessors.sql +++ b/src/database/sql/core/accessors.sql @@ -23,5 +23,9 @@ LIMIT 1 -- selectMissingTransactionIds SELECT transaction_id -FROM missing_transactions -LIMIT @limit +FROM ( + SELECT transaction_id + FROM missing_transactions + LIMIT @limit +) +ORDER BY RANDOM() diff --git a/src/database/standalone-sqlite.test.ts b/src/database/standalone-sqlite.test.ts index 2bab92a5..2dd3dea6 100644 --- a/src/database/standalone-sqlite.test.ts +++ b/src/database/standalone-sqlite.test.ts @@ -53,7 +53,8 @@ const { default: processStream } = arbundles; const HEIGHT = 1138; const BLOCK_TX_INDEX = 42; const DATA_ITEM_ID = 'zoljIRyzG5hp-R4EZV2q8kFI49OAoy23_B9YJ_yEEws'; -const CURSOR = 'WzExMzgsNDIsInpvbGpJUnl6RzVocC1SNEVaVjJxOGtGSTQ5T0FveTIzX0I5WUpfeUVFd3MiXQ'; +const CURSOR = + 'WzExMzgsNDIsInpvbGpJUnl6RzVocC1SNEVaVjJxOGtGSTQ5T0FveTIzX0I5WUpfeUVFd3MiXQ'; describe('SQLite helper functions', () => { describe('toSqliteParams', () => { diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index 57421eed..b3adcc03 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -47,6 +47,8 @@ import { currentUnixTimestamp } from '../lib/time.js'; import log from '../log.js'; import { BlockListValidator, + BundleIndex, + BundleRecord, ChainIndex, ContiguousDataAttributes, ContiguousDataIndex, @@ -65,6 +67,7 @@ const MAX_WORKER_ERRORS = 100; const STABLE_FLUSH_INTERVAL = 5; const NEW_TX_CLEANUP_WAIT_SECS = 60 * 60 * 2; const NEW_DATA_ITEM_CLEANUP_WAIT_SECS = 60 * 60 * 2; +const BUNDLE_REPROCESS_WAIT_SECS = 60 * 60 * 4; const LOW_SELECTIVITY_TAG_NAMES = new Set(['App-Name', 'Content-Type']); function tagJoinSortPriority(tag: { name: string; values: string[] }) { @@ -329,8 +332,8 @@ export class StandaloneSqliteDatabaseWorker { moderation: { [stmtName: string]: Sqlite.Statement }; bundles: { [stmtName: string]: Sqlite.Statement }; }; - private bundleFormatIds: { [filter: string]: number; } = {}; - private filterIds: { [filter: string]: number; } = {}; + private bundleFormatIds: { [filter: string]: number } = {}; + private filterIds: { [filter: string]: number } = {}; // Transactions resetBundlesToHeightFn: Sqlite.Transaction; @@ -694,11 +697,30 @@ export class StandaloneSqliteDatabaseWorker { } getMissingTxIds(limit: number) { - const missingTxIds = this.stmts.core.selectMissingTransactionIds.all({ + const rows = this.stmts.core.selectMissingTransactionIds.all({ limit, }); - return missingTxIds.map((row): string => toB64Url(row.transaction_id)); + return rows.map((row): string => toB64Url(row.transaction_id)); + } + + getFailedBundleIds(limit: number) { + const rows = this.stmts.bundles.selectFailedBundleIds.all({ + limit, + reprocess_cutoff: currentUnixTimestamp() - BUNDLE_REPROCESS_WAIT_SECS, + }); + + return rows.map((row): string => toB64Url(row.id)); + } + + backfillBundles() { + this.stmts.bundles.insertMissingBundles.run(); + } + + updateBundlesFullyIndexedAt() { + this.stmts.bundles.updateFullyIndexedAt.run({ + fully_indexed_at: currentUnixTimestamp(), + }); } resetToHeight(height: number) { @@ -720,7 +742,7 @@ export class StandaloneSqliteDatabaseWorker { if (format != undefined) { id = this.bundleFormatIds[format]; if (id == undefined) { - id= this.stmts.bundles.selectFormatId.get({ format })?.id; + id = this.stmts.bundles.selectFormatId.get({ format })?.id; if (id != undefined) { this.bundleFormatIds[format] = id; } @@ -735,7 +757,7 @@ export class StandaloneSqliteDatabaseWorker { id = this.filterIds[filter]; if (id == undefined) { this.stmts.bundles.insertOrIgnoreFilter.run({ filter }); - id= this.stmts.bundles.selectFilterId.get({ filter })?.id; + id = this.stmts.bundles.selectFilterId.get({ filter })?.id; if (id != undefined) { this.filterIds[filter] = id; } @@ -754,6 +776,7 @@ export class StandaloneSqliteDatabaseWorker { saveBundle({ id, + rootTransactionId, format, unbundleFilter, indexFilter, @@ -763,21 +786,15 @@ export class StandaloneSqliteDatabaseWorker { skippedAt, unbundledAt, fullyIndexedAt, - }: { - id: string; - format: 'ans-102' | 'ans-104'; - unbundleFilter?: string; - indexFilter?: string; - dataItemCount?: number; - matchedDataItemCount?: number; - queuedAt?: number; - skippedAt?: number; - unbundledAt?: number; - fullyIndexedAt?: number; - }) { + }: BundleRecord) { const idBuffer = fromB64Url(id); + let rootTxId: Buffer | undefined; + if (rootTransactionId != undefined) { + rootTxId = fromB64Url(rootTransactionId); + } this.stmts.bundles.upsertBundle.run({ id: idBuffer, + root_transaction_id: rootTxId, format_id: this.getBundleFormatId(format), unbundle_filter_id: this.getFilterId(unbundleFilter), index_filter_id: this.getFilterId(indexFilter), @@ -2041,6 +2058,7 @@ const WORKER_POOL_SIZES: WorkerPoolSizes = { export class StandaloneSqliteDatabase implements + BundleIndex, BlockListValidator, ChainIndex, ContiguousDataIndex, @@ -2246,10 +2264,22 @@ export class StandaloneSqliteDatabase return this.queueRead('core', 'getBlockHashByHeight', [height]); } - getMissingTxIds(limit = 20): Promise { + getMissingTxIds(limit: number): Promise { return this.queueRead('core', 'getMissingTxIds', [limit]); } + getFailedBundleIds(limit: number): Promise { + return this.queueRead('bundles', 'getFailedBundleIds', [limit]); + } + + backfillBundles() { + return this.queueRead('bundles', 'backfillBundles', undefined); + } + + updateBundlesFullyIndexedAt(): Promise { + return this.queueRead('bundles', 'updateBundlesFullyIndexedAt', undefined); + } + resetToHeight(height: number): Promise { return this.queueWrite('core', 'resetToHeight', [height]); } @@ -2262,17 +2292,7 @@ export class StandaloneSqliteDatabase return this.queueWrite('bundles', 'saveDataItem', [item]); } - saveBundle(bundle: { - id: string; - format: 'ans-102' | 'ans-104'; - unbundleFilter?: string; - indexFilter?: string; - dataItemCount?: number; - matchedDataItemCount?: number; - queuedAt?: number; - skippedAt?: number; - unbundledAt?: number; - }): Promise { + saveBundle(bundle: BundleRecord): Promise { return this.queueWrite('bundles', 'saveBundle', [bundle]); } @@ -2476,8 +2496,19 @@ if (!isMainThread) { parentPort?.postMessage(newBlockHash); break; case 'getMissingTxIds': - const missingTxIdsRes = worker.getMissingTxIds(args[0]); - parentPort?.postMessage(missingTxIdsRes); + parentPort?.postMessage(worker.getMissingTxIds(args[0])); + break; + case 'getFailedBundleIds': + const failedBundleIds = worker.getFailedBundleIds(args[0]); + parentPort?.postMessage(failedBundleIds); + break; + case 'backfillBundles': + worker.backfillBundles(); + parentPort?.postMessage(null); + break; + case 'updateBundlesFullyIndexedAt': + worker.updateBundlesFullyIndexedAt(); + parentPort?.postMessage(null); break; case 'resetToHeight': worker.resetToHeight(args[0]); diff --git a/src/system.ts b/src/system.ts index 9fe483f3..22d817b2 100644 --- a/src/system.ts +++ b/src/system.ts @@ -41,6 +41,7 @@ import { FsDataStore } from './store/fs-data-store.js'; import { FsTransactionStore } from './store/fs-transaction-store.js'; import { BlockListValidator, + BundleIndex, ChainIndex, ContiguousDataIndex, DataItemIndexWriter, @@ -51,6 +52,7 @@ import { import { Ans104DataIndexer } from './workers/ans104-data-indexer.js'; import { Ans104Unbundler } from './workers/ans104-unbundler.js'; import { BlockImporter } from './workers/block-importer.js'; +import { BundleRepairWorker } from './workers/bundle-repair-worker.js'; import { DataItemIndexer } from './workers/data-item-indexer.js'; import { TransactionFetcher } from './workers/transaction-fetcher.js'; import { TransactionImporter } from './workers/transaction-importer.js'; @@ -105,6 +107,7 @@ export const db = new StandaloneSqliteDatabase({ }); export const chainIndex: ChainIndex = db; +export const bundleIndex: BundleIndex = db; export const contiguousDataIndex: ContiguousDataIndex = db; export const blockListValidator: BlockListValidator = db; export const nestedDataIndexWriter: NestedDataIndexWriter = db; @@ -167,6 +170,13 @@ export const txRepairWorker = new TransactionRepairWorker({ txFetcher, }); +export const bundleRepairWorker = new BundleRepairWorker({ + log, + bundleIndex, + txFetcher, + shouldBackfillBundles: config.BACKFILL_BUNDLE_RECORDS, +}); + // Configure contigous data source const chunkDataSource = new ReadThroughChunkDataCache({ log, @@ -208,6 +218,7 @@ eventEmitter.on( async (tx: PartialJsonTransaction) => { await db.saveBundle({ id: tx.id, + rootTransactionId: tx.id, format: 'ans-104', }); if (await config.ANS104_UNBUNDLE_FILTER.match(tx)) { diff --git a/src/types.d.ts b/src/types.d.ts index 5996ea67..de10449e 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -174,7 +174,7 @@ export interface ChainSource { export interface ChainIndex { getMaxHeight(): Promise; getBlockHashByHeight(height: number): Promise; - getMissingTxIds(limit?: number): Promise; + getMissingTxIds(limit: number): Promise; resetToHeight(height: number): Promise; saveTx(txs: PartialJsonTransaction): Promise; saveBlockAndTxs( @@ -184,6 +184,27 @@ export interface ChainIndex { ): Promise; } +export interface BundleRecord { + id: string; + rootTransactionId?: string; + format: 'ans-102' | 'ans-104'; + unbundleFilter?: string; + indexFilter?: string; + dataItemCount?: number; + matchedDataItemCount?: number; + queuedAt?: number; + skippedAt?: number; + unbundledAt?: number; + fullyIndexedAt?: number; +} + +export interface BundleIndex { + saveBundle(bundle: BundleRecord): Promise; + getFailedBundleIds(limit: number): Promise; + updateBundlesFullyIndexedAt(): Promise; + backfillBundles(): Promise; +} + export interface DataItemIndexWriter { saveDataItem(item: NormalizedDataItem): Promise; } diff --git a/src/workers/bundle-repair-worker.ts b/src/workers/bundle-repair-worker.ts new file mode 100644 index 00000000..c48f5286 --- /dev/null +++ b/src/workers/bundle-repair-worker.ts @@ -0,0 +1,99 @@ +/** + * AR.IO Gateway + * Copyright (C) 2023 Permanent Data Solutions, Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +import * as winston from 'winston'; + +import { BundleIndex } from '../types.js'; +import { TransactionFetcher } from './transaction-fetcher.js'; + +const DEFAULT_RETRY_INTERVAL_MS = 10 * 60 * 1000; // 10 minutes +const DEFAULT_UPDATE_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes +const DEFAULT_BUNDLE_BACKFILL_INTERVAL_MS = 15 * 60 * 1000; // 15 minutes +const DEFAULT_BUNDLES_TO_RETRY = 20; + +export class BundleRepairWorker { + // Dependencies + private log: winston.Logger; + private bundleIndex: BundleIndex; + private txFetcher: TransactionFetcher; + private shouldBackfillBundles: boolean; + + constructor({ + log, + bundleIndex, + txFetcher, + shouldBackfillBundles, + }: { + log: winston.Logger; + bundleIndex: BundleIndex; + txFetcher: TransactionFetcher; + shouldBackfillBundles: boolean; + }) { + this.log = log.child({ class: 'BundleRepairWorker' }); + this.bundleIndex = bundleIndex; + this.txFetcher = txFetcher; + this.shouldBackfillBundles = shouldBackfillBundles; + } + + async start(): Promise { + setInterval(this.retryBundles.bind(this), DEFAULT_RETRY_INTERVAL_MS); + setInterval( + this.updateBundleTimestamps.bind(this), + DEFAULT_UPDATE_INTERVAL_MS, + ); + if (this.shouldBackfillBundles) { + setInterval( + this.backfillBundles.bind(this), + DEFAULT_BUNDLE_BACKFILL_INTERVAL_MS, + ); + } + } + + async retryBundles() { + try { + const bundleIds = await this.bundleIndex.getFailedBundleIds( + DEFAULT_BUNDLES_TO_RETRY, + ); + for (const bundleId of bundleIds) { + this.log.info('Retrying failed bundle', { bundleId }); + await this.txFetcher.queueTxId(bundleId); + } + } catch (error: any) { + this.log.error('Error retrying failed bundles:', error); + } + } + + async updateBundleTimestamps() { + try { + this.log.info('Updating bundle timestamps...'); + await this.bundleIndex.updateBundlesFullyIndexedAt(); + this.log.info('Bundle timestamps updated.'); + } catch (error: any) { + this.log.error('Error updating bundle timestamps:', error); + } + } + + async backfillBundles() { + try { + this.log.info('Backfilling bundle records...'); + await this.bundleIndex.backfillBundles(); + this.log.info('Bundle records backfilled.'); + } catch (error: any) { + this.log.error('Error backfilling bundle records:', error); + } + } +} diff --git a/src/workers/transaction-repair-worker.ts b/src/workers/transaction-repair-worker.ts index 7a53b2b1..7941ac11 100644 --- a/src/workers/transaction-repair-worker.ts +++ b/src/workers/transaction-repair-worker.ts @@ -21,6 +21,7 @@ import { ChainIndex } from '../types.js'; import { TransactionFetcher } from './transaction-fetcher.js'; const DEFAULT_INTERVAL_MS = 5 * 60 * 1000; +const DEFAULT_TXS_TO_RETRY = 20; export class TransactionRepairWorker { // Dependencies @@ -48,7 +49,9 @@ export class TransactionRepairWorker { async retryMissingTransactions() { try { - const missingTxIds = await this.chainIndex.getMissingTxIds(); + const missingTxIds = await this.chainIndex.getMissingTxIds( + DEFAULT_TXS_TO_RETRY, + ); for (const txId of missingTxIds) { this.log.info('Retrying missing transaction', { txId }); await this.txFetcher.queueTxId(txId); diff --git a/test/bundles-schema.sql b/test/bundles-schema.sql index 9a941a49..59d64ade 100644 --- a/test/bundles-schema.sql +++ b/test/bundles-schema.sql @@ -126,5 +126,15 @@ CREATE TABLE bundles ( last_unbundled_at INTEGER, first_fully_indexed_at INTEGER, last_fully_indexed_at INTEGER -); +, root_transaction_id BLOB); CREATE INDEX bundles_format_id_idx ON bundles (format_id); +CREATE INDEX bundles_last_queued_at_idx + ON bundles (last_queued_at); +CREATE INDEX bundles_last_skipped_at_idx + ON bundles (last_skipped_at); +CREATE INDEX bundles_last_fully_indexed_at_idx + ON bundles (last_fully_indexed_at); +CREATE INDEX bundles_matched_data_item_count_idx + ON bundles (matched_data_item_count); +CREATE INDEX bundle_data_items_parent_id_filter_id_idx + ON bundle_data_items (parent_id, filter_id); From 94050227150266929222498ac3e44a68b7e13450 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 11 Jul 2023 12:53:07 -0500 Subject: [PATCH 30/33] feat(sqlite bundles): index nested ANS-104 bundles PE-3639 Adds ANS104_NESTED_BUNDLE_INDEXED and ANS104_BUNDLED_INDEXED events. ANS104_NESTED_BUNDLED_INDEXED is emitted when a nested ANS-104 bundle is indexed and ready for processing and ANS104_BUNDLE_INDEXED is a more general event that is emitted when either a nested ANS-104 or a L1 ANS-104 bundle is ready for processing. Also modifies existing bundle event handling logic to use the new combined event and handle both L1 TXs and data items. --- src/database/sql/bundles/repair.sql | 29 +++++++++++++++++++++++++ src/events.ts | 8 +++++-- src/system.ts | 33 +++++++++++++++++++++-------- 3 files changed, 59 insertions(+), 11 deletions(-) diff --git a/src/database/sql/bundles/repair.sql b/src/database/sql/bundles/repair.sql index c3a6c2ef..a93bf653 100644 --- a/src/database/sql/bundles/repair.sql +++ b/src/database/sql/bundles/repair.sql @@ -72,5 +72,34 @@ WHERE nttf.tag_name_hash = x'BF796ECA81CCE3FF36CEA53FA1EBB0F274A0FF29' AND nttv.tag_name_hash = x'858B76CB055E360A2E4C3C38F4A3049F80175216' AND nttv.tag_value_hash = x'F7CA6A21D278EB5CE64611AADBDB77EF1511D3DD' AND b.id IS NULL +UNION ALL +SELECT + sdi.id, + sdi.root_transaction_id, + (SELECT id FROM bundle_formats WHERE format = 'ans-104') +FROM stable_data_item_tags sdif +JOIN stable_data_item_tags sdiv ON sdiv.data_item_id = sdif.data_item_id + AND sdiv.data_item_tag_index != sdif.data_item_tag_index +JOIN stable_data_items sdi ON sdi.id = sdif.data_item_id +LEFT JOIN bundles b ON b.id = sdif.data_item_id +WHERE sdif.tag_name_hash = x'BF796ECA81CCE3FF36CEA53FA1EBB0F274A0FF29' + AND sdif.tag_value_hash = x'7E57CFE843145135AEE1F4D0D63CEB7842093712' + AND sdiv.tag_name_hash = x'858B76CB055E360A2E4C3C38F4A3049F80175216' + AND sdiv.tag_value_hash = x'F7CA6A21D278EB5CE64611AADBDB77EF1511D3DD' + AND b.id IS NULL +UNION ALL +SELECT + ndi.id, + ndi.root_transaction_id, + (SELECT id FROM bundle_formats WHERE format = 'ans-104') +FROM new_data_item_tags ndif +JOIN new_data_item_tags ndiv ON ndiv.data_item_id = ndif.data_item_id +JOIN new_data_items ndi ON ndi.id = ndif.data_item_id +LEFT JOIN bundles b ON b.id = ndif.data_item_id +WHERE ndif.tag_name_hash = x'BF796ECA81CCE3FF36CEA53FA1EBB0F274A0FF29' + AND ndif.tag_value_hash = x'7E57CFE843145135AEE1F4D0D63CEB7842093712' + AND ndiv.tag_name_hash = x'858B76CB055E360A2E4C3C38F4A3049F80175216' + AND ndiv.tag_value_hash = x'F7CA6A21D278EB5CE64611AADBDB77EF1511D3DD' + AND b.id IS NULL LIMIT 10000 ON CONFLICT DO NOTHING diff --git a/src/events.ts b/src/events.ts index 5708f5cf..df524465 100644 --- a/src/events.ts +++ b/src/events.ts @@ -15,9 +15,13 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +export const ANS104_BUNDLE_INDEXED = 'ans104-bundle-indexed'; +export const ANS104_DATA_ITEM_BUNDLE_MATCHED = + 'ans104-data-item-bundle-matched'; export const ANS104_DATA_ITEM_DATA_INDEXED = 'ans104-data-item-data-indexed'; -export const ANS104_DATA_ITEM_INDEXED = 'ans104-data-indexed'; -export const ANS104_DATA_ITEM_MATCHED = 'asn104-data-item-matched'; +export const ANS104_DATA_ITEM_INDEXED = 'ans104-data-item-indexed'; +export const ANS104_DATA_ITEM_MATCHED = 'ans104-data-item-matched'; +export const ANS104_NESTED_BUNDLE_INDEXED = 'ans104-nested-bundle-indexed'; export const ANS104_TX_INDEXED = 'ans104-tx-indexed'; export const ANS104_UNBUNDLE_COMPLETE = 'ans104-unbundle-complete'; export const BLOCK_FETCHED = 'block-fetched'; diff --git a/src/system.ts b/src/system.ts index 22d817b2..d962118a 100644 --- a/src/system.ts +++ b/src/system.ts @@ -47,6 +47,7 @@ import { DataItemIndexWriter, MatchableItem, NestedDataIndexWriter, + NormalizedDataItem, PartialJsonTransaction, } from './types.js'; import { Ans104DataIndexer } from './workers/ans104-data-indexer.js'; @@ -139,9 +140,20 @@ const ans104TxMatcher = new MatchTags([ eventEmitter.on(events.TX_INDEXED, async (tx: MatchableItem) => { if (await ans104TxMatcher.match(tx)) { eventEmitter.emit(events.ANS104_TX_INDEXED, tx); + eventEmitter.emit(events.ANS104_BUNDLE_INDEXED, tx); } }); +eventEmitter.on( + events.ANS104_DATA_ITEM_DATA_INDEXED, + async (item: MatchableItem) => { + if (await ans104TxMatcher.match(item)) { + eventEmitter.emit(events.ANS104_NESTED_BUNDLE_INDEXED, item); + eventEmitter.emit(events.ANS104_BUNDLE_INDEXED, item); + } + }, +); + const txFetcher = new TransactionFetcher({ log, chainSource: arweaveClient, @@ -214,28 +226,31 @@ const ans104Unbundler = new Ans104Unbundler({ }); eventEmitter.on( - events.ANS104_TX_INDEXED, - async (tx: PartialJsonTransaction) => { + events.ANS104_BUNDLE_INDEXED, + async (item: NormalizedDataItem | PartialJsonTransaction) => { await db.saveBundle({ - id: tx.id, - rootTransactionId: tx.id, + id: item.id, + rootTransactionId: 'root_tx_id' in item ? item.root_tx_id : item.id, format: 'ans-104', }); - if (await config.ANS104_UNBUNDLE_FILTER.match(tx)) { + if (await config.ANS104_UNBUNDLE_FILTER.match(item)) { await db.saveBundle({ - id: tx.id, + id: item.id, format: 'ans-104', unbundleFilter: config.ANS104_UNBUNDLE_FILTER_STRING, indexFilter: config.ANS104_INDEX_FILTER_STRING, queuedAt: currentUnixTimestamp(), }); ans104Unbundler.queueItem({ - index: -1, // parent indexes are not needed for L1 - ...tx, + index: + 'parent_index' in item && item.parent_index !== undefined + ? item.parent_index + : -1, // parent indexes are not needed for L1 + ...item, }); } else { await db.saveBundle({ - id: tx.id, + id: item.id, format: 'ans-104', unbundleFilter: config.ANS104_UNBUNDLE_FILTER_STRING, skippedAt: currentUnixTimestamp(), From a75455d6e9393a774bea7474af4b0c6629675105 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Tue, 11 Jul 2023 16:50:21 -0500 Subject: [PATCH 31/33] feat(bundles): add a process to reindex bundles after a filter change PE-4115 Adds a process that resets bundle timestamps for bundles that were processed with different filters than are currenly in use. Since the process creates some DB load even if the filters are unchnaged, it is only enabled when the FILTER_CHANGE_REPROCESS environment variable is set to true. In the future we may optimize this further by keeping a log of filter changes. That would enable more efficient queries based on comparing timestamps (< filter change time) rather than filter IDs (using an inequality). --- docker-compose.yaml | 1 + src/config.ts | 2 ++ src/database/sql/bundles/repair.sql | 26 +++++++++++++++++++++++ src/database/standalone-sqlite.ts | 19 +++++++++++++++++ src/system.ts | 3 +++ src/types.d.ts | 4 ++++ src/workers/bundle-repair-worker.ts | 32 +++++++++++++++++++++++++++++ 7 files changed, 87 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 7d4d3b02..061eb5eb 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -43,6 +43,7 @@ services: - AR_IO_WALLET=${AR_IO_WALLET:-} - ADMIN_API_KEY=${ADMIN_API_KEY:-} - BACKFILL_BUNDLE_RECORDS=${BACKFILL_BUNDLE_RECORDS:-} + - FILTER_CHANGE_REPROCESS=${FILTER_CHANGE_REPROCESS:-} - ANS104_UNBUNDLE_FILTER=${ANS104_UNBUNDLE_FILTER:-} - ANS104_INDEX_FILTER=${ANS104_INDEX_FILTER:-} - ARNS_ROOT_HOST=${ARNS_ROOT_HOST:-} diff --git a/src/config.ts b/src/config.ts index 28755328..b93f3fe0 100644 --- a/src/config.ts +++ b/src/config.ts @@ -55,6 +55,8 @@ if (env.varOrUndefined('ADMIN_API_KEY') === undefined) { } export const BACKFILL_BUNDLE_RECORDS = env.varOrDefault('BACKFILL_BUNDLE_RECORDS', 'false') === 'true'; +export const FILTER_CHANGE_REPROCESS = + env.varOrDefault('FILTER_CHANGE_REPROCESS', 'false') === 'true'; export const ANS104_UNBUNDLE_FILTER_STRING = canonicalize( JSON.parse(env.varOrDefault('ANS104_UNBUNDLE_FILTER', '{"never": true}')), ); diff --git a/src/database/sql/bundles/repair.sql b/src/database/sql/bundles/repair.sql index a93bf653..dfb385a0 100644 --- a/src/database/sql/bundles/repair.sql +++ b/src/database/sql/bundles/repair.sql @@ -40,6 +40,32 @@ WHERE matched_data_item_count IS NOT NULL HAVING COUNT(*) = bundles.matched_data_item_count ) AND last_fully_indexed_at IS NULL +-- updateForFilterChange +UPDATE bundles +SET + last_queued_at = NULL, + last_skipped_at = NULL +WHERE id IN ( + SELECT b.id + FROM bundles b + WHERE ( + last_skipped_at IS NOT NULL + AND unbundle_filter_id != ( + SELECT id + FROM filters + WHERE filter = @unbundle_filter + ) + ) OR ( + last_queued_at IS NOT NULL + AND index_filter_id != ( + SELECT id + FROM filters + WHERE filter = @index_filter + ) + ) + LIMIT 10000 +) + --insertMissingBundles INSERT INTO bundles ( id, diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index b3adcc03..c3903729 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -723,6 +723,13 @@ export class StandaloneSqliteDatabaseWorker { }); } + updateBundlesForFilterChange(unbundleFilter: string, indexFilter: string) { + this.stmts.bundles.updateForFilterChange.run({ + unbundle_filter: unbundleFilter, + index_filter: indexFilter, + }); + } + resetToHeight(height: number) { this.resetBundlesToHeightFn(height); this.resetCoreToHeightFn(height); @@ -2280,6 +2287,13 @@ export class StandaloneSqliteDatabase return this.queueRead('bundles', 'updateBundlesFullyIndexedAt', undefined); } + updateBundlesForFilterChange(unbundleFilter: string, indexFilter: string) { + return this.queueWrite('bundles', 'updateBundlesForFilterChange', [ + unbundleFilter, + indexFilter, + ]); + } + resetToHeight(height: number): Promise { return this.queueWrite('core', 'resetToHeight', [height]); } @@ -2510,6 +2524,11 @@ if (!isMainThread) { worker.updateBundlesFullyIndexedAt(); parentPort?.postMessage(null); break; + case 'updateBundlesForFilterChange': + const [unbundleFilter, indexFilter] = args; + worker.updateBundlesForFilterChange(unbundleFilter, indexFilter); + parentPort?.postMessage(null); + break; case 'resetToHeight': worker.resetToHeight(args[0]); parentPort?.postMessage(undefined); diff --git a/src/system.ts b/src/system.ts index d962118a..75429050 100644 --- a/src/system.ts +++ b/src/system.ts @@ -186,7 +186,10 @@ export const bundleRepairWorker = new BundleRepairWorker({ log, bundleIndex, txFetcher, + unbundleFilter: config.ANS104_UNBUNDLE_FILTER_STRING, + indexFilter: config.ANS104_INDEX_FILTER_STRING, shouldBackfillBundles: config.BACKFILL_BUNDLE_RECORDS, + filtersChanged: config.FILTER_CHANGE_REPROCESS, }); // Configure contigous data source diff --git a/src/types.d.ts b/src/types.d.ts index de10449e..1d15de39 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -202,6 +202,10 @@ export interface BundleIndex { saveBundle(bundle: BundleRecord): Promise; getFailedBundleIds(limit: number): Promise; updateBundlesFullyIndexedAt(): Promise; + updateBundlesForFilterChange( + unbundleFilter: string, + indexFilter: string, + ): Promise; backfillBundles(): Promise; } diff --git a/src/workers/bundle-repair-worker.ts b/src/workers/bundle-repair-worker.ts index c48f5286..a8b4285d 100644 --- a/src/workers/bundle-repair-worker.ts +++ b/src/workers/bundle-repair-worker.ts @@ -23,6 +23,7 @@ import { TransactionFetcher } from './transaction-fetcher.js'; const DEFAULT_RETRY_INTERVAL_MS = 10 * 60 * 1000; // 10 minutes const DEFAULT_UPDATE_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes const DEFAULT_BUNDLE_BACKFILL_INTERVAL_MS = 15 * 60 * 1000; // 15 minutes +const DEFAULT_FILTER_REPOCESS_INTERVAL_MS = 15 * 60 * 1000; // 15 minutes const DEFAULT_BUNDLES_TO_RETRY = 20; export class BundleRepairWorker { @@ -30,23 +31,35 @@ export class BundleRepairWorker { private log: winston.Logger; private bundleIndex: BundleIndex; private txFetcher: TransactionFetcher; + private unbundledFilter: string; + private indexFilter: string; private shouldBackfillBundles: boolean; + private filtersChanged: boolean; constructor({ log, bundleIndex, txFetcher, + unbundleFilter, + indexFilter, shouldBackfillBundles, + filtersChanged, }: { log: winston.Logger; bundleIndex: BundleIndex; txFetcher: TransactionFetcher; + unbundleFilter: string; + indexFilter: string; shouldBackfillBundles: boolean; + filtersChanged: boolean; }) { this.log = log.child({ class: 'BundleRepairWorker' }); this.bundleIndex = bundleIndex; this.txFetcher = txFetcher; + this.unbundledFilter = unbundleFilter; + this.indexFilter = indexFilter; this.shouldBackfillBundles = shouldBackfillBundles; + this.filtersChanged = filtersChanged; } async start(): Promise { @@ -61,6 +74,12 @@ export class BundleRepairWorker { DEFAULT_BUNDLE_BACKFILL_INTERVAL_MS, ); } + if (this.filtersChanged) { + setInterval( + this.updateForFilterChange.bind(this), + DEFAULT_FILTER_REPOCESS_INTERVAL_MS, + ); + } } async retryBundles() { @@ -96,4 +115,17 @@ export class BundleRepairWorker { this.log.error('Error backfilling bundle records:', error); } } + + async updateForFilterChange() { + try { + this.log.info('Update bundles for filter change...'); + await this.bundleIndex.updateBundlesForFilterChange( + this.unbundledFilter, + this.indexFilter, + ); + this.log.info('Bundles updated for filter change.'); + } catch (error: any) { + this.log.error('Error updating bundles for filter change:', error); + } + } } From 66181c7a115f1ab9dcd32c8ffa83eb3df4982123 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Thu, 13 Jul 2023 16:50:27 -0500 Subject: [PATCH 32/33] refactor(bundles ans-104): use owner address from data item instead of rehashing Prior to this change we were hashing the owner key to get the owner address. This change uses the owner address from the data item instead. These should always be the same value so rehashing is unnecessary. Note: I ran a test comparing the values and on the sample of data items I processed there were no differences. --- src/database/standalone-sqlite.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/database/standalone-sqlite.ts b/src/database/standalone-sqlite.ts index c3903729..827c8e66 100644 --- a/src/database/standalone-sqlite.ts +++ b/src/database/standalone-sqlite.ts @@ -259,7 +259,7 @@ export function dataItemToDbRows(item: NormalizedDataItem, height?: number) { } const ownerBuffer = fromB64Url(item.owner); - const ownerAddressBuffer = ownerToAddress(ownerBuffer); + const ownerAddressBuffer = fromB64Url(item.owner_address); wallets.push({ address: ownerAddressBuffer, public_modulus: ownerBuffer }); From d3e94575834f28908b251598abca55f65c5681a1 Mon Sep 17 00:00:00 2001 From: David Whittington Date: Mon, 17 Jul 2023 14:26:31 -0500 Subject: [PATCH 33/33] feat(filters): support on-demand owner hashing PE-4214 In order to simplify filter construction, if owner_address is set in a filter, but only owner is present on the matchable item (L1 TXs don't include the address), hash owner on-demand to produce and owner_address and match against that. --- src/filters.test.ts | 18 ++++++++++++++++-- src/filters.ts | 8 +++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/filters.test.ts b/src/filters.test.ts index eeb6f6f7..b94e2ff3 100644 --- a/src/filters.test.ts +++ b/src/filters.test.ts @@ -35,6 +35,7 @@ function getTx(id: string) { const TX_ID = '----LT69qUmuIeC4qb0MZHlxVp7UxLu_14rEkA_9n6w'; const TX = getTx(TX_ID); +const TX_OWNER_ADDRESS = 'Th825IP80n4i9F3Rc4cBFh767CGqiV4n7S-Oy5lGLjc'; describe('AlwaysMatch', () => { const alwaysMatch = new AlwaysMatch(); @@ -178,12 +179,25 @@ describe('MatchAttributes', () => { const matchAttributes = new MatchAttributes(attributes); - delete TX.owner; + const tx = JSON.parse(JSON.stringify(TX)); + delete tx.owner; - const result = await matchAttributes.match(TX); + const result = await matchAttributes.match(tx); expect(result).to.be.false; }); + + it('should match owner given an owner address', async () => { + const attributes = { + owner_address: TX_OWNER_ADDRESS, + }; + + const matchAttributes = new MatchAttributes(attributes); + + const result = await matchAttributes.match(TX); + + expect(result).to.be.true; + }); }); describe('createFilter', () => { diff --git a/src/filters.ts b/src/filters.ts index c52738a0..eb655e37 100644 --- a/src/filters.ts +++ b/src/filters.ts @@ -1,4 +1,4 @@ -import { b64UrlToUtf8 } from './lib/encoding.js'; +import { b64UrlToUtf8, fromB64Url, sha256B64Url } from './lib/encoding.js'; import { ItemFilter, MatchableItem } from './types.js'; export class AlwaysMatch implements ItemFilter { @@ -108,6 +108,12 @@ export class MatchAttributes implements ItemFilter { for (const [name, value] of Object.entries(this.attributes)) { if (item?.[name as keyof MatchableItem] === value) { matches.add(name); + } else if (name === 'owner_address' && item['owner'] !== undefined) { + const ownerBuffer = fromB64Url(item['owner']); + const ownerAddress = sha256B64Url(ownerBuffer); + if (ownerAddress === value) { + matches.add(name); + } } }