From 62b78ccae1bcd93027657bb9178275593b1189d6 Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Thu, 21 Nov 2024 15:16:22 +1300 Subject: [PATCH] Instructions for optimising repetitive datasources in project manifest (#572) * Instructions for optimising repetitive datasources in project manifest * Add references --------- Co-authored-by: James Bayly --- docs/indexer/build/manifest/algorand.md | 36 ++++++++++ docs/indexer/build/manifest/arbitrum.md | 36 ++++++++++ docs/indexer/build/manifest/avalanche.md | 36 ++++++++++ docs/indexer/build/manifest/bsc.md | 36 ++++++++++ docs/indexer/build/manifest/concordium.md | 36 ++++++++++ docs/indexer/build/manifest/cosmos.md | 36 ++++++++++ docs/indexer/build/manifest/ethereum.md | 36 ++++++++++ docs/indexer/build/manifest/flare.md | 36 ++++++++++ docs/indexer/build/manifest/gnosis.md | 36 ++++++++++ docs/indexer/build/manifest/near.md | 36 ++++++++++ docs/indexer/build/manifest/optimism.md | 36 ++++++++++ docs/indexer/build/manifest/polkadot.md | 8 +++ docs/indexer/build/manifest/polygon.md | 36 ++++++++++ docs/indexer/build/manifest/stellar.md | 43 ++++++++++++ docs/indexer/build/optimisation.md | 86 +++++++++++++++++++++++ 15 files changed, 569 insertions(+) diff --git a/docs/indexer/build/manifest/algorand.md b/docs/indexer/build/manifest/algorand.md index acde879614e..d23d1815d1e 100644 --- a/docs/indexer/build/manifest/algorand.md +++ b/docs/indexer/build/manifest/algorand.md @@ -279,6 +279,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Bypass Blocks Bypass Blocks allows you to skip the stated blocks, this is useful when there are erroneous blocks in the chain or when a chain skips a block after an outage or a hard fork. It accepts both a `range` or single `integer` entry in the array. diff --git a/docs/indexer/build/manifest/arbitrum.md b/docs/indexer/build/manifest/arbitrum.md index 1063f4dcfd5..28f7e1d91a5 100644 --- a/docs/indexer/build/manifest/arbitrum.md +++ b/docs/indexer/build/manifest/arbitrum.md @@ -315,6 +315,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/avalanche.md b/docs/indexer/build/manifest/avalanche.md index 38b6615007b..dec93a2ea69 100644 --- a/docs/indexer/build/manifest/avalanche.md +++ b/docs/indexer/build/manifest/avalanche.md @@ -324,6 +324,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/bsc.md b/docs/indexer/build/manifest/bsc.md index 4fc0bb63785..2f9e865fcf0 100644 --- a/docs/indexer/build/manifest/bsc.md +++ b/docs/indexer/build/manifest/bsc.md @@ -313,6 +313,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/concordium.md b/docs/indexer/build/manifest/concordium.md index 26f39c6ae67..f076d4e34b5 100644 --- a/docs/indexer/build/manifest/concordium.md +++ b/docs/indexer/build/manifest/concordium.md @@ -290,6 +290,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Bypass Blocks Bypass Blocks allows you to skip the stated blocks, this is useful when there are erroneous blocks in the chain or when a chain skips a block after an outage or a hard fork. It accepts both a `range` or single `integer` entry in the array. diff --git a/docs/indexer/build/manifest/cosmos.md b/docs/indexer/build/manifest/cosmos.md index aff78757ca6..cd9f0141b26 100644 --- a/docs/indexer/build/manifest/cosmos.md +++ b/docs/indexer/build/manifest/cosmos.md @@ -320,6 +320,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Chain Types We can load protobuf message definitions to allow support for specific Cosmos zones under `network.chaintypes`. Any protobuf files that are required for the network (these end in `.proto`) should be imported. For example, you can find Osmosis' protobuf definitions [here](https://buf.build/osmosis-labs/osmosis/tree/main:osmosis) diff --git a/docs/indexer/build/manifest/ethereum.md b/docs/indexer/build/manifest/ethereum.md index 76ab8e59301..9d3ced0ba9d 100644 --- a/docs/indexer/build/manifest/ethereum.md +++ b/docs/indexer/build/manifest/ethereum.md @@ -311,6 +311,34 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + ::: info Note When executing `subql codegen`, it will check if topics and functions are valid. ::: @@ -337,6 +365,14 @@ When declaring a `range` use an string in the format of `"start - end"`. Both st } ``` +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Endpoint Config This allows you to set specific options relevant to each specific RPC endpoint that you are indexing from. This is very useful when endpoints have unique authentication requirements, or they operate with different rate limits. diff --git a/docs/indexer/build/manifest/flare.md b/docs/indexer/build/manifest/flare.md index d2399b6d561..466212ec386 100644 --- a/docs/indexer/build/manifest/flare.md +++ b/docs/indexer/build/manifest/flare.md @@ -310,6 +310,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/gnosis.md b/docs/indexer/build/manifest/gnosis.md index ab87526a2e0..25a4c402d6a 100644 --- a/docs/indexer/build/manifest/gnosis.md +++ b/docs/indexer/build/manifest/gnosis.md @@ -324,6 +324,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/near.md b/docs/indexer/build/manifest/near.md index 473f4b0962e..353d0462c40 100644 --- a/docs/indexer/build/manifest/near.md +++ b/docs/indexer/build/manifest/near.md @@ -331,6 +331,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ### Action Types There are several types of actions as defined [here](https://github.com/subquery/subql-near/blob/main/packages/types/src/interfaces.ts#L91) diff --git a/docs/indexer/build/manifest/optimism.md b/docs/indexer/build/manifest/optimism.md index be012eaa12c..46d3460208e 100644 --- a/docs/indexer/build/manifest/optimism.md +++ b/docs/indexer/build/manifest/optimism.md @@ -323,6 +323,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/polkadot.md b/docs/indexer/build/manifest/polkadot.md index cb50f8da8bd..d0e300a6fd1 100644 --- a/docs/indexer/build/manifest/polkadot.md +++ b/docs/indexer/build/manifest/polkadot.md @@ -320,6 +320,14 @@ Some common examples timestamp: "0 0 * * 0" ``` +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Custom Chains You can index data from custom Substrate chains by also including chain types in the manifest. diff --git a/docs/indexer/build/manifest/polygon.md b/docs/indexer/build/manifest/polygon.md index 11bcbd931a0..a4a0b8bb54a 100644 --- a/docs/indexer/build/manifest/polygon.md +++ b/docs/indexer/build/manifest/polygon.md @@ -314,6 +314,42 @@ filter: modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... ``` +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/manifest/stellar.md b/docs/indexer/build/manifest/stellar.md index 0028ae943ff..3132c7a3486 100644 --- a/docs/indexer/build/manifest/stellar.md +++ b/docs/indexer/build/manifest/stellar.md @@ -324,6 +324,49 @@ Default runtime mapping filters are an extremely useful feature to decide what e Only incoming data that satisfies the filter conditions will be processed by the mapping functions. Mapping filters are optional but are highly recommended as they significantly reduce the amount of data processed by your SubQuery project and will improve indexing performance. +The `modulo` filter allows handling every N blocks, which is useful if you want to group or calculate data at a set interval. The following example shows how to use this filter. + +```yml +filter: + modulo: 50 # Index every 50 blocks: 0, 50, 100, 150.... +``` + +The `timestamp` filter is very useful when indexing block data with specific time intervals between them. It can be used in cases where you are aggregating data on a hourly/daily basis. It can be also used to set a delay between calls to `blockHandler` functions to reduce the computational costs of this handler. + +The `timestamp` filter accepts a valid cron expression and runs on schedule against the timestamps of the blocks being indexed. Times are considered on UTC dates and times. The block handler will run on the first block that is after the next iteration of the cron expression. + +```yml +filter: + # This cron expression will index blocks with at least 5 minutes interval + # between their timestamps starting at startBlock given under the datasource. + timestamp: "*/5 * * * *" +``` + +::: tip Note +We use the [cron-converter](https://github.com/roccivic/cron-converter) package to generate unix timestamps for iterations out of the given cron expression. So, make sure the format of the cron expression given in the `timestamp` filter is compatible with the package. +::: + +Some common examples + +```yml + # Every minute + timestamp: "* * * * *" + # Every hour on the hour (UTC) + timestamp: "0 * * * *" + # Every day at 1am UTC + timestamp: "0 1 * * *" + # Every Sunday (weekly) at 0:00 UTC + timestamp: "0 0 * * 0" +``` + +::: info Simplifying your Project Manifest for a large number contract addresses + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many similar ERC20 contracts, there are [ways to better handle a large static list of contract addresses](../optimisation.md#simplifying-the-project-manifest). + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +::: + ## Real-time indexing (Block Confirmations) As indexers are an additional layer in your data processing pipeline, they can introduce a massive delay between when an on-chain event occurs and when the data is processed and able to be queried from the indexer. diff --git a/docs/indexer/build/optimisation.md b/docs/indexer/build/optimisation.md index 3eddccc0b83..06f01ec2dd1 100644 --- a/docs/indexer/build/optimisation.md +++ b/docs/indexer/build/optimisation.md @@ -95,3 +95,89 @@ If your project requires indexing all the blocks, transactions alongside more sp We recommend this approach, because it takes time to index all the blocks and it can slow down your project significantly. If you want to apply some changes to your filters or entities shape you may need to remove your database and reindex the whole project from the beginning. A common example is creating a large project that indexes everything so you can perform internal analysis on your contracts, and then much smaller and optimised project for indexing the key data for your dApp. The larger project that indexes everything might never change and so you can avoid costly reindexing, while the smaller optimised project will change as your dApp matures and can be reindexed much faster. + +## Simplifying the Project Manifest + +If your project has the same handlers for multiple versions of the same type of contract your project manifest can get quite repetitive. e.g you want to index the transfers for many ERC20 contracts. + +Note that there is also [dynamic datasources](./dynamicdatasources.md) for when your list of addresses is dynamic (e.g. you use a factory contract). + +In cases where there are a large number of contract addresses, but the list is static, you can simplify the manifest a couple of ways depending on whether you're using typescript or yaml. With typescript you can use functions as you would with any other typescript file. With yaml you can use [anchors](https://www.howtogeek.com/devops/how-to-simplify-docker-compose-files-with-yaml-anchors-and-extensions/). + +::: code-tabs +@tab project.ts + +```ts + +const erc20Addresses = [ + "0x09395a2a58db45db0da254c7eaa5ac469d8bdc85", + // Other contract addresses go here +]; + +const project: EthereumProject = { + // ...The rest of your project manifest + dataSources: [ + ...addresses.map(address => ({ + { + kind: EthereumDatasourceKind.Runtime, + startBlock: 1, + + options: { + abi: "erc20", + address, + }, + assets: new Map([["erc20", { file: "./abis/erc20.abi.json" }]]), + mapping: { + file: "./dist/index.js", + handlers: [ + { + kind: EthereumHandlerKind.Event, + handler: "handleLog", + filter: { + topics: [ + "Transfer(address indexed from, address indexed to, uint256 amount)", + ], + }, + }, + ], + }, + } + })) + // Other data sources here + ], +}; + +``` + +@tab project.yml + +```yml +# The rest or your project yaml + +x-erc20: &erc20 + kind: ethereum/Runtime + startBlock: 10512216 + assets: + erc20: + file: ./abis/erc20.abi.json + options: + abi: erc20 + mapping: + file: ./dist/index.js + handlers: + - handler: handleLog + kind: ethereum/LogHandler + filter: + topics: + - Transfer(address indexed from, address indexed to, uint256 amount) + +dataSources: + # Repeat this with different addresses + - <<: *erc20 + options: + abi: erc20 + address: "0x09395a2a58db45db0da254c7eaa5ac469d8bdc85" + # Other datasources here +``` + +:::