From 60673aa29fabf0293b428608840511fef6c86a20 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 20 Aug 2024 13:55:18 -0400 Subject: [PATCH 01/22] MultiNode integration setup --- pkg/solana/chain.go | 7 +- pkg/solana/chain_multinode.go | 268 +++++++ pkg/solana/chain_test.go | 3 +- pkg/solana/client/client.go | 8 +- pkg/solana/client/client_test.go | 4 +- pkg/solana/client/multinode/models.go | 121 +++ pkg/solana/client/multinode/multi_node.go | 379 +++++++++ pkg/solana/client/multinode/node.go | 331 ++++++++ pkg/solana/client/multinode/node_fsm.go | 370 +++++++++ pkg/solana/client/multinode/node_lifecycle.go | 732 ++++++++++++++++++ pkg/solana/client/multinode/node_selector.go | 43 + pkg/solana/client/multinode/poller.go | 99 +++ pkg/solana/client/multinode/poller_test.go | 187 +++++ pkg/solana/client/multinode/send_only_node.go | 183 +++++ .../multinode/send_only_node_lifecycle.go | 67 ++ .../client/multinode/send_only_node_test.go | 139 ++++ .../client/multinode/transaction_sender.go | 277 +++++++ .../multinode/transaction_sender_test.go | 360 +++++++++ pkg/solana/client/multinode/types.go | 124 +++ pkg/solana/client/rpc_client.go | 318 ++++++++ pkg/solana/config/multinode.go | 86 ++ pkg/solana/config/toml.go | 8 +- 22 files changed, 4103 insertions(+), 11 deletions(-) create mode 100644 pkg/solana/chain_multinode.go create mode 100644 pkg/solana/client/multinode/models.go create mode 100644 pkg/solana/client/multinode/multi_node.go create mode 100644 pkg/solana/client/multinode/node.go create mode 100644 pkg/solana/client/multinode/node_fsm.go create mode 100644 pkg/solana/client/multinode/node_lifecycle.go create mode 100644 pkg/solana/client/multinode/node_selector.go create mode 100644 pkg/solana/client/multinode/poller.go create mode 100644 pkg/solana/client/multinode/poller_test.go create mode 100644 pkg/solana/client/multinode/send_only_node.go create mode 100644 pkg/solana/client/multinode/send_only_node_lifecycle.go create mode 100644 pkg/solana/client/multinode/send_only_node_test.go create mode 100644 pkg/solana/client/multinode/transaction_sender.go create mode 100644 pkg/solana/client/multinode/transaction_sender_test.go create mode 100644 pkg/solana/client/multinode/types.go create mode 100644 pkg/solana/client/rpc_client.go create mode 100644 pkg/solana/config/multinode.go diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index 4e03fd425..20f6322d5 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -114,7 +114,8 @@ func (v *verifiedCachedClient) verifyChainID() (bool, error) { v.chainIDVerifiedLock.Lock() defer v.chainIDVerifiedLock.Unlock() - v.chainID, err = v.ReaderWriter.ChainID() + strID, err := v.ReaderWriter.ChainID(context.Background()) + v.chainID = strID.String() if err != nil { v.chainIDVerified = false return v.chainIDVerified, fmt.Errorf("failed to fetch ChainID in verifiedCachedClient: %w", err) @@ -186,13 +187,13 @@ func (v *verifiedCachedClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, return v.ReaderWriter.LatestBlockhash() } -func (v *verifiedCachedClient) ChainID() (string, error) { +func (v *verifiedCachedClient) ChainID(ctx context.Context) (client.StringID, error) { verified, err := v.verifyChainID() if !verified { return "", err } - return v.chainID, nil + return client.StringID(v.chainID), nil } func (v *verifiedCachedClient) GetFeeForMessage(msg string) (uint64, error) { diff --git a/pkg/solana/chain_multinode.go b/pkg/solana/chain_multinode.go new file mode 100644 index 000000000..82fb5b23f --- /dev/null +++ b/pkg/solana/chain_multinode.go @@ -0,0 +1,268 @@ +package solana + +import ( + "context" + "errors" + "fmt" + "math/big" + "sync" + "time" + + solanago "github.com/gagliardetto/solana-go" + "github.com/gagliardetto/solana-go/programs/system" + "github.com/smartcontractkit/chainlink-common/pkg/chains" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/loop" + "github.com/smartcontractkit/chainlink-common/pkg/services" + relaytypes "github.com/smartcontractkit/chainlink-common/pkg/types" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/client" + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/txm" +) + +func NewMultiNodeChain(cfg *config.TOMLConfig, opts ChainOpts) (Chain, error) { + if !cfg.IsEnabled() { + return nil, fmt.Errorf("cannot create new chain with ID %s: chain is disabled", *cfg.ChainID) + } + c, err := newMultiNodeChain(*cfg.ChainID, cfg, opts.KeyStore, opts.Logger) + if err != nil { + return nil, err + } + return c, nil +} + +var _ Chain = (*multiNodeChain)(nil) + +type multiNodeChain struct { + services.StateMachine + id string + cfg *config.TOMLConfig + multiNode *mn.MultiNode[client.StringID, *client.RpcClient] + txSender *mn.TransactionSender[*solanago.Transaction, client.StringID, *client.RpcClient] + txm *txm.Txm + balanceMonitor services.Service + lggr logger.Logger + + clientLock sync.RWMutex +} + +func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.Logger) (*multiNodeChain, error) { + lggr = logger.With(lggr, "chainID", id, "chain", "solana") + + chainFamily := "solana" + + cfg.BlockHistoryPollPeriod() + + mnCfg := cfg.MultiNodeConfig() + + var nodes []mn.Node[client.StringID, *client.RpcClient] + + for i, nodeInfo := range cfg.ListNodes() { + // create client and check + rpcClient, err := client.NewRpcClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) + if err != nil { + lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) + continue + } + + newNode := mn.NewNode[client.StringID, *client.Head, *client.RpcClient]( + mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, + int32(i), client.StringID(id), 0, rpcClient, chainFamily) + + nodes = append(nodes, newNode) + } + + multiNode := mn.NewMultiNode[client.StringID, *client.RpcClient]( + lggr, + mn.NodeSelectionModeRoundRobin, + time.Duration(0), // TODO: set lease duration + nodes, + []mn.SendOnlyNode[client.StringID, *client.RpcClient]{}, // TODO: no send only nodes? + client.StringID(id), + chainFamily, + time.Duration(0), // TODO: set deathDeclarationDelay + ) + + classifySendError := func(tx *solanago.Transaction, err error) mn.SendTxReturnCode { + return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) + } + + txSender := mn.NewTransactionSender[*solanago.Transaction, client.StringID, *client.RpcClient]( + lggr, + client.StringID(id), + chainFamily, + multiNode, + classifySendError, + 0, // use the default value provided by the implementation + ) + + var ch = multiNodeChain{ + id: id, + cfg: cfg, + multiNode: multiNode, + txSender: txSender, + lggr: logger.Named(lggr, "Chain"), + } + + tc := func() (client.ReaderWriter, error) { + return ch.multiNode.SelectRPC() + } + + ch.txm = txm.NewTxm(ch.id, tc, cfg, ks, lggr) + bc := func() (monitor.BalanceClient, error) { + return ch.multiNode.SelectRPC() + } + ch.balanceMonitor = monitor.NewBalanceMonitor(ch.id, cfg, lggr, ks, bc) + return &ch, nil +} + +// ChainService interface +func (c *multiNodeChain) GetChainStatus(ctx context.Context) (relaytypes.ChainStatus, error) { + toml, err := c.cfg.TOMLString() + if err != nil { + return relaytypes.ChainStatus{}, err + } + return relaytypes.ChainStatus{ + ID: c.id, + Enabled: c.cfg.IsEnabled(), + Config: toml, + }, nil +} + +func (c *multiNodeChain) ListNodeStatuses(ctx context.Context, pageSize int32, pageToken string) (stats []relaytypes.NodeStatus, nextPageToken string, total int, err error) { + return chains.ListNodeStatuses(int(pageSize), pageToken, c.listNodeStatuses) +} + +func (c *multiNodeChain) Transact(ctx context.Context, from, to string, amount *big.Int, balanceCheck bool) error { + return c.sendTx(ctx, from, to, amount, balanceCheck) +} + +func (c *multiNodeChain) listNodeStatuses(start, end int) ([]relaytypes.NodeStatus, int, error) { + stats := make([]relaytypes.NodeStatus, 0) + total := len(c.cfg.Nodes) + if start >= total { + return stats, total, chains.ErrOutOfRange + } + if end > total { + end = total + } + nodes := c.cfg.Nodes[start:end] + for _, node := range nodes { + stat, err := config.NodeStatus(node, c.ChainID()) + if err != nil { + return stats, total, err + } + stats = append(stats, stat) + } + return stats, total, nil +} + +func (c *multiNodeChain) Name() string { + return c.lggr.Name() +} + +func (c *multiNodeChain) ID() string { + return c.id +} + +func (c *multiNodeChain) Config() config.Config { + return c.cfg +} + +func (c *multiNodeChain) TxManager() TxManager { + return c.txm +} + +func (c *multiNodeChain) Reader() (client.Reader, error) { + return c.multiNode.SelectRPC() +} + +func (c *multiNodeChain) ChainID() string { + return c.id +} + +func (c *multiNodeChain) Start(ctx context.Context) error { + return c.StartOnce("Chain", func() error { + c.lggr.Debug("Starting") + c.lggr.Debug("Starting txm") + c.lggr.Debug("Starting balance monitor") + var ms services.MultiStart + return ms.Start(ctx, c.txm, c.balanceMonitor) + }) +} + +func (c *multiNodeChain) Close() error { + return c.StopOnce("Chain", func() error { + c.lggr.Debug("Stopping") + c.lggr.Debug("Stopping txm") + c.lggr.Debug("Stopping balance monitor") + return services.CloseAll(c.txm, c.balanceMonitor) + }) +} + +func (c *multiNodeChain) Ready() error { + return errors.Join( + c.StateMachine.Ready(), + c.txm.Ready(), + ) +} + +func (c *multiNodeChain) HealthReport() map[string]error { + report := map[string]error{c.Name(): c.Healthy()} + services.CopyHealth(report, c.txm.HealthReport()) + return report +} + +func (c *multiNodeChain) sendTx(ctx context.Context, from, to string, amount *big.Int, balanceCheck bool) error { + reader, err := c.Reader() + if err != nil { + return fmt.Errorf("chain unreachable: %w", err) + } + + fromKey, err := solanago.PublicKeyFromBase58(from) + if err != nil { + return fmt.Errorf("failed to parse from key: %w", err) + } + toKey, err := solanago.PublicKeyFromBase58(to) + if err != nil { + return fmt.Errorf("failed to parse to key: %w", err) + } + if !amount.IsUint64() { + return fmt.Errorf("amount %s overflows uint64", amount) + } + amountI := amount.Uint64() + + blockhash, err := reader.LatestBlockhash() + if err != nil { + return fmt.Errorf("failed to get latest block hash: %w", err) + } + tx, err := solanago.NewTransaction( + []solanago.Instruction{ + system.NewTransferInstruction( + amountI, + fromKey, + toKey, + ).Build(), + }, + blockhash.Value.Blockhash, + solanago.TransactionPayer(fromKey), + ) + if err != nil { + return fmt.Errorf("failed to create tx: %w", err) + } + + if balanceCheck { + if err = solanaValidateBalance(reader, fromKey, amountI, tx.Message.ToBase64()); err != nil { + return fmt.Errorf("failed to validate balance: %w", err) + } + } + + txm := c.TxManager() + err = txm.Enqueue("", tx) + if err != nil { + return fmt.Errorf("transaction failed: %w", err) + } + return nil +} diff --git a/pkg/solana/chain_test.go b/pkg/solana/chain_test.go index aa52b8b4d..3f1fdaf23 100644 --- a/pkg/solana/chain_test.go +++ b/pkg/solana/chain_test.go @@ -1,6 +1,7 @@ package solana import ( + "context" "errors" "fmt" "io" @@ -174,7 +175,7 @@ func TestSolanaChain_VerifiedClient(t *testing.T) { testChain.id = "incorrect" c, err = testChain.verifiedClient(node) assert.NoError(t, err) - _, err = c.ChainID() + _, err = c.ChainID(context.Background()) // expect error from id mismatch (even if using a cached client) when performing RPC calls assert.Error(t, err) assert.Equal(t, fmt.Sprintf("client returned mismatched chain id (expected: %s, got: %s): %s", "incorrect", "devnet", node.URL), err.Error()) diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index e51c93837..d007e3c4c 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -33,7 +33,7 @@ type Reader interface { Balance(addr solana.PublicKey) (uint64, error) SlotHeight() (uint64, error) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) - ChainID() (string, error) + ChainID(ctx context.Context) (StringID, error) GetFeeForMessage(msg string) (uint64, error) GetLatestBlock() (*rpc.GetBlockResult, error) } @@ -142,11 +142,11 @@ func (c *Client) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { return v.(*rpc.GetLatestBlockhashResult), err } -func (c *Client) ChainID() (string, error) { +func (c *Client) ChainID(ctx context.Context) (StringID, error) { done := c.latency("chain_id") defer done() - ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) + ctx, cancel := context.WithTimeout(ctx, c.contextDuration) defer cancel() v, err, _ := c.requestGroup.Do("GetGenesisHash", func() (interface{}, error) { return c.rpc.GetGenesisHash(ctx) @@ -168,7 +168,7 @@ func (c *Client) ChainID() (string, error) { c.log.Warnf("unknown genesis hash - assuming solana chain is 'localnet'") network = "localnet" } - return network, nil + return StringID(network), nil } func (c *Client) GetFeeForMessage(msg string) (uint64, error) { diff --git a/pkg/solana/client/client_test.go b/pkg/solana/client/client_test.go index ab9dba263..6f2276bd3 100644 --- a/pkg/solana/client/client_test.go +++ b/pkg/solana/client/client_test.go @@ -76,7 +76,7 @@ func TestClient_Reader_Integration(t *testing.T) { assert.Equal(t, uint64(5000), fee) // get chain ID based on gensis hash - network, err := c.ChainID() + network, err := c.ChainID(context.Background()) assert.NoError(t, err) assert.Equal(t, "localnet", network) @@ -120,7 +120,7 @@ func TestClient_Reader_ChainID(t *testing.T) { // get chain ID based on gensis hash for _, n := range networks { - network, err := c.ChainID() + network, err := c.ChainID(context.Background()) assert.NoError(t, err) assert.Equal(t, n, network) } diff --git a/pkg/solana/client/multinode/models.go b/pkg/solana/client/multinode/models.go new file mode 100644 index 000000000..526bb25c8 --- /dev/null +++ b/pkg/solana/client/multinode/models.go @@ -0,0 +1,121 @@ +package client + +import ( + "bytes" + "fmt" +) + +type SendTxReturnCode int + +// SendTxReturnCode is a generalized client error that dictates what should be the next action, depending on the RPC error response. +const ( + Successful SendTxReturnCode = iota + 1 + Fatal // Unrecoverable error. Most likely the attempt should be thrown away. + Retryable // The error returned by the RPC indicates that if we retry with the same attempt, the tx will eventually go through. + Underpriced // Attempt was underpriced. New estimation is needed with bumped gas price. + Unknown // Tx failed with an error response that is not recognized by the client. + Unsupported // Attempt failed with an error response that is not supported by the client for the given chain. + TransactionAlreadyKnown // The transaction that was sent has already been received by the RPC. + InsufficientFunds // Tx was rejected due to insufficient funds. + ExceedsMaxFee // Attempt's fee was higher than the node's limit and got rejected. + FeeOutOfValidRange // This error is returned when we use a fee price suggested from an RPC, but the network rejects the attempt due to an invalid range(mostly used by L2 chains). Retry by requesting a new suggested fee price. + TerminallyStuck // The error returned when a transaction is or could get terminally stuck in the mempool without any chance of inclusion. + sendTxReturnCodeLen // tracks the number of errors. Must always be last +) + +// sendTxSevereErrors - error codes which signal that transaction would never be accepted in its current form by the node +var sendTxSevereErrors = []SendTxReturnCode{Fatal, Underpriced, Unsupported, ExceedsMaxFee, FeeOutOfValidRange, Unknown} + +// sendTxSuccessfulCodes - error codes which signal that transaction was accepted by the node +var sendTxSuccessfulCodes = []SendTxReturnCode{Successful, TransactionAlreadyKnown} + +func (c SendTxReturnCode) String() string { + switch c { + case Successful: + return "Successful" + case Fatal: + return "Fatal" + case Retryable: + return "Retryable" + case Underpriced: + return "Underpriced" + case Unknown: + return "Unknown" + case Unsupported: + return "Unsupported" + case TransactionAlreadyKnown: + return "TransactionAlreadyKnown" + case InsufficientFunds: + return "InsufficientFunds" + case ExceedsMaxFee: + return "ExceedsMaxFee" + case FeeOutOfValidRange: + return "FeeOutOfValidRange" + case TerminallyStuck: + return "TerminallyStuck" + default: + return fmt.Sprintf("SendTxReturnCode(%d)", c) + } +} + +type NodeTier int + +const ( + Primary = NodeTier(iota) + Secondary +) + +func (n NodeTier) String() string { + switch n { + case Primary: + return "primary" + case Secondary: + return "secondary" + default: + return fmt.Sprintf("NodeTier(%d)", n) + } +} + +// syncStatus - defines problems related to RPC's state synchronization. Can be used as a bitmask to define multiple issues +type syncStatus int + +const ( + // syncStatusSynced - RPC is fully synced + syncStatusSynced = 0 + // syncStatusNotInSyncWithPool - RPC is lagging behind the highest block observed within the pool of RPCs + syncStatusNotInSyncWithPool syncStatus = 1 << iota + // syncStatusNoNewHead - RPC failed to produce a new head for too long + syncStatusNoNewHead + // syncStatusNoNewFinalizedHead - RPC failed to produce a new finalized head for too long + syncStatusNoNewFinalizedHead + syncStatusLen +) + +func (s syncStatus) String() string { + if s == syncStatusSynced { + return "Synced" + } + var result bytes.Buffer + for i := syncStatusNotInSyncWithPool; i < syncStatusLen; i = i << 1 { + if i&s == 0 { + continue + } + result.WriteString(i.string()) + result.WriteString(",") + } + result.Truncate(result.Len() - 1) + return result.String() +} + +func (s syncStatus) string() string { + switch s { + case syncStatusNotInSyncWithPool: + return "NotInSyncWithRPCPool" + case syncStatusNoNewHead: + return "NoNewHead" + case syncStatusNoNewFinalizedHead: + return "NoNewFinalizedHead" + default: + return fmt.Sprintf("syncStatus(%d)", s) + } +} diff --git a/pkg/solana/client/multinode/multi_node.go b/pkg/solana/client/multinode/multi_node.go new file mode 100644 index 000000000..386e09554 --- /dev/null +++ b/pkg/solana/client/multinode/multi_node.go @@ -0,0 +1,379 @@ +package client + +import ( + "context" + "errors" + "fmt" + "math/big" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" +) + +var ( + // PromMultiNodeRPCNodeStates reports current RPC node state + PromMultiNodeRPCNodeStates = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "multi_node_states", + Help: "The number of RPC nodes currently in the given state for the given chain", + }, []string{"network", "chainId", "state"}) + ErroringNodeError = fmt.Errorf("no live nodes available") +) + +// MultiNode is a generalized multi node client interface that includes methods to interact with different chains. +// It also handles multiple node RPC connections simultaneously. +type MultiNode[ + CHAIN_ID ID, + RPC any, +] struct { + services.StateMachine + primaryNodes []Node[CHAIN_ID, RPC] + sendOnlyNodes []SendOnlyNode[CHAIN_ID, RPC] + chainID CHAIN_ID + lggr logger.SugaredLogger + selectionMode string + nodeSelector NodeSelector[CHAIN_ID, RPC] + leaseDuration time.Duration + leaseTicker *time.Ticker + chainFamily string + reportInterval time.Duration + deathDeclarationDelay time.Duration + + activeMu sync.RWMutex + activeNode Node[CHAIN_ID, RPC] + + chStop services.StopChan + wg sync.WaitGroup +} + +func NewMultiNode[ + CHAIN_ID ID, + RPC any, +]( + lggr logger.Logger, + selectionMode string, // type of the "best" RPC selector (e.g HighestHead, RoundRobin, etc.) + leaseDuration time.Duration, // defines interval on which new "best" RPC should be selected + primaryNodes []Node[CHAIN_ID, RPC], + sendOnlyNodes []SendOnlyNode[CHAIN_ID, RPC], + chainID CHAIN_ID, // configured chain ID (used to verify that passed primaryNodes belong to the same chain) + chainFamily string, // name of the chain family - used in the metrics + deathDeclarationDelay time.Duration, +) *MultiNode[CHAIN_ID, RPC] { + nodeSelector := newNodeSelector(selectionMode, primaryNodes) + // Prometheus' default interval is 15s, set this to under 7.5s to avoid + // aliasing (see: https://en.wikipedia.org/wiki/Nyquist_frequency) + const reportInterval = 6500 * time.Millisecond + c := &MultiNode[CHAIN_ID, RPC]{ + primaryNodes: primaryNodes, + sendOnlyNodes: sendOnlyNodes, + chainID: chainID, + lggr: logger.Sugared(lggr).Named("MultiNode").With("chainID", chainID.String()), + selectionMode: selectionMode, + nodeSelector: nodeSelector, + chStop: make(services.StopChan), + leaseDuration: leaseDuration, + chainFamily: chainFamily, + reportInterval: reportInterval, + deathDeclarationDelay: deathDeclarationDelay, + } + + c.lggr.Debugf("The MultiNode is configured to use NodeSelectionMode: %s", selectionMode) + + return c +} + +func (c *MultiNode[CHAIN_ID, RPC]) ChainID() CHAIN_ID { + return c.chainID +} + +func (c *MultiNode[CHAIN_ID, RPC]) DoAll(ctx context.Context, do func(ctx context.Context, rpc RPC, isSendOnly bool)) error { + var err error + ok := c.IfNotStopped(func() { + ctx, _ = c.chStop.Ctx(ctx) + + callsCompleted := 0 + for _, n := range c.primaryNodes { + select { + case <-ctx.Done(): + err = ctx.Err() + return + default: + if n.State() != NodeStateAlive { + continue + } + do(ctx, n.RPC(), false) + callsCompleted++ + } + } + if callsCompleted == 0 { + err = ErroringNodeError + } + + for _, n := range c.sendOnlyNodes { + select { + case <-ctx.Done(): + err = ctx.Err() + return + default: + if n.State() != NodeStateAlive { + continue + } + do(ctx, n.RPC(), true) + } + } + }) + if !ok { + return errors.New("MultiNode is stopped") + } + return err +} + +func (c *MultiNode[CHAIN_ID, RPC]) NodeStates() map[string]NodeState { + states := map[string]NodeState{} + for _, n := range c.primaryNodes { + states[n.String()] = n.State() + } + for _, n := range c.sendOnlyNodes { + states[n.String()] = n.State() + } + return states +} + +// Start starts every node in the pool +// +// Nodes handle their own redialing and runloops, so this function does not +// return any error if the nodes aren't available +func (c *MultiNode[CHAIN_ID, RPC]) Start(ctx context.Context) error { + return c.StartOnce("MultiNode", func() (merr error) { + if len(c.primaryNodes) == 0 { + return fmt.Errorf("no available nodes for chain %s", c.chainID.String()) + } + var ms services.MultiStart + for _, n := range c.primaryNodes { + if n.ConfiguredChainID().String() != c.chainID.String() { + return ms.CloseBecause(fmt.Errorf("node %s has configured chain ID %s which does not match multinode configured chain ID of %s", n.String(), n.ConfiguredChainID().String(), c.chainID.String())) + } + n.SetPoolChainInfoProvider(c) + // node will handle its own redialing and automatic recovery + if err := ms.Start(ctx, n); err != nil { + return err + } + } + for _, s := range c.sendOnlyNodes { + if s.ConfiguredChainID().String() != c.chainID.String() { + return ms.CloseBecause(fmt.Errorf("sendonly node %s has configured chain ID %s which does not match multinode configured chain ID of %s", s.String(), s.ConfiguredChainID().String(), c.chainID.String())) + } + if err := ms.Start(ctx, s); err != nil { + return err + } + } + c.wg.Add(1) + go c.runLoop() + + if c.leaseDuration.Seconds() > 0 && c.selectionMode != NodeSelectionModeRoundRobin { + c.lggr.Infof("The MultiNode will switch to best node every %s", c.leaseDuration.String()) + c.wg.Add(1) + go c.checkLeaseLoop() + } else { + c.lggr.Info("Best node switching is disabled") + } + + return nil + }) +} + +// Close tears down the MultiNode and closes all nodes +func (c *MultiNode[CHAIN_ID, RPC]) Close() error { + return c.StopOnce("MultiNode", func() error { + close(c.chStop) + c.wg.Wait() + + return services.CloseAll(services.MultiCloser(c.primaryNodes), services.MultiCloser(c.sendOnlyNodes)) + }) +} + +// SelectRPC returns an RPC of an active node. If there are no active nodes it returns an error. +// Call this method from your chain-specific client implementation to access any chain-specific rpc calls. +func (c *MultiNode[CHAIN_ID, RPC]) SelectRPC() (rpc RPC, err error) { + n, err := c.selectNode() + if err != nil { + return rpc, err + } + return n.RPC(), nil +} + +// selectNode returns the active Node, if it is still NodeStateAlive, otherwise it selects a new one from the NodeSelector. +func (c *MultiNode[CHAIN_ID, RPC]) selectNode() (node Node[CHAIN_ID, RPC], err error) { + c.activeMu.RLock() + node = c.activeNode + c.activeMu.RUnlock() + if node != nil && node.State() == NodeStateAlive { + return // still alive + } + + // select a new one + c.activeMu.Lock() + defer c.activeMu.Unlock() + node = c.activeNode + if node != nil && node.State() == NodeStateAlive { + return // another goroutine beat us here + } + + if c.activeNode != nil { + c.activeNode.UnsubscribeAllExceptAliveLoop() + } + c.activeNode = c.nodeSelector.Select() + + if c.activeNode == nil { + c.lggr.Criticalw("No live RPC nodes available", "NodeSelectionMode", c.nodeSelector.Name()) + errmsg := fmt.Errorf("no live nodes available for chain %s", c.chainID.String()) + c.SvcErrBuffer.Append(errmsg) + err = ErroringNodeError + } + + return c.activeNode, err +} + +// LatestChainInfo - returns number of live nodes available in the pool, so we can prevent the last alive node in a pool from being marked as out-of-sync. +// Return highest ChainInfo most recently received by the alive nodes. +// E.g. If Node A's the most recent block is 10 and highest 15 and for Node B it's - 12 and 14. This method will return 12. +func (c *MultiNode[CHAIN_ID, RPC]) LatestChainInfo() (int, ChainInfo) { + var nLiveNodes int + ch := ChainInfo{ + TotalDifficulty: big.NewInt(0), + } + for _, n := range c.primaryNodes { + if s, nodeChainInfo := n.StateAndLatest(); s == NodeStateAlive { + nLiveNodes++ + ch.BlockNumber = max(ch.BlockNumber, nodeChainInfo.BlockNumber) + ch.FinalizedBlockNumber = max(ch.FinalizedBlockNumber, nodeChainInfo.FinalizedBlockNumber) + ch.TotalDifficulty = MaxTotalDifficulty(ch.TotalDifficulty, nodeChainInfo.TotalDifficulty) + } + } + return nLiveNodes, ch +} + +// HighestUserObservations - returns highest ChainInfo ever observed by any user of the MultiNode +func (c *MultiNode[CHAIN_ID, RPC]) HighestUserObservations() ChainInfo { + ch := ChainInfo{ + TotalDifficulty: big.NewInt(0), + } + for _, n := range c.primaryNodes { + nodeChainInfo := n.HighestUserObservations() + ch.BlockNumber = max(ch.BlockNumber, nodeChainInfo.BlockNumber) + ch.FinalizedBlockNumber = max(ch.FinalizedBlockNumber, nodeChainInfo.FinalizedBlockNumber) + ch.TotalDifficulty = MaxTotalDifficulty(ch.TotalDifficulty, nodeChainInfo.TotalDifficulty) + } + return ch +} + +func (c *MultiNode[CHAIN_ID, RPC]) checkLease() { + bestNode := c.nodeSelector.Select() + for _, n := range c.primaryNodes { + // Terminate client subscriptions. Services are responsible for reconnecting, which will be routed to the new + // best node. Only terminate connections with more than 1 subscription to account for the aliveLoop subscription + if n.State() == NodeStateAlive && n != bestNode { + c.lggr.Infof("Switching to best node from %q to %q", n.String(), bestNode.String()) + n.UnsubscribeAllExceptAliveLoop() + } + } + + c.activeMu.Lock() + defer c.activeMu.Unlock() + if bestNode != c.activeNode { + if c.activeNode != nil { + c.activeNode.UnsubscribeAllExceptAliveLoop() + } + c.activeNode = bestNode + } +} + +func (c *MultiNode[CHAIN_ID, RPC]) checkLeaseLoop() { + defer c.wg.Done() + c.leaseTicker = time.NewTicker(c.leaseDuration) + defer c.leaseTicker.Stop() + + for { + select { + case <-c.leaseTicker.C: + c.checkLease() + case <-c.chStop: + return + } + } +} + +func (c *MultiNode[CHAIN_ID, RPC]) runLoop() { + defer c.wg.Done() + + nodeStates := make([]nodeWithState, len(c.primaryNodes)) + for i, n := range c.primaryNodes { + nodeStates[i] = nodeWithState{ + Node: n.String(), + State: n.State().String(), + DeadSince: nil, + } + } + + c.report(nodeStates) + + monitor := services.NewTicker(c.reportInterval) + defer monitor.Stop() + + for { + select { + case <-monitor.C: + c.report(nodeStates) + case <-c.chStop: + return + } + } +} + +type nodeWithState struct { + Node string + State string + DeadSince *time.Time +} + +func (c *MultiNode[CHAIN_ID, RPC]) report(nodesStateInfo []nodeWithState) { + start := time.Now() + var dead int + counts := make(map[NodeState]int) + for i, n := range c.primaryNodes { + state := n.State() + counts[state]++ + nodesStateInfo[i].State = state.String() + if state == NodeStateAlive { + nodesStateInfo[i].DeadSince = nil + continue + } + + if nodesStateInfo[i].DeadSince == nil { + nodesStateInfo[i].DeadSince = &start + } + + if start.Sub(*nodesStateInfo[i].DeadSince) >= c.deathDeclarationDelay { + dead++ + } + } + for _, state := range allNodeStates { + count := counts[state] + PromMultiNodeRPCNodeStates.WithLabelValues(c.chainFamily, c.chainID.String(), state.String()).Set(float64(count)) + } + + total := len(c.primaryNodes) + live := total - dead + c.lggr.Tracew(fmt.Sprintf("MultiNode state: %d/%d nodes are alive", live, total), "nodeStates", nodesStateInfo) + if total == dead { + rerr := fmt.Errorf("no primary nodes available: 0/%d nodes are alive", total) + c.lggr.Criticalw(rerr.Error(), "nodeStates", nodesStateInfo) + c.SvcErrBuffer.Append(rerr) + } else if dead > 0 { + c.lggr.Errorw(fmt.Sprintf("At least one primary node is dead: %d/%d nodes are alive", live, total), "nodeStates", nodesStateInfo) + } +} diff --git a/pkg/solana/client/multinode/node.go b/pkg/solana/client/multinode/node.go new file mode 100644 index 000000000..c3532b1a1 --- /dev/null +++ b/pkg/solana/client/multinode/node.go @@ -0,0 +1,331 @@ +package client + +import ( + "context" + "errors" + "fmt" + "net/url" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" +) + +const QueryTimeout = 10 * time.Second + +var errInvalidChainID = errors.New("invalid chain id") + +var ( + promPoolRPCNodeVerifies = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_verifies", + Help: "The total number of chain ID verifications for the given RPC node", + }, []string{"network", "chainID", "nodeName"}) + promPoolRPCNodeVerifiesFailed = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_verifies_failed", + Help: "The total number of failed chain ID verifications for the given RPC node", + }, []string{"network", "chainID", "nodeName"}) + promPoolRPCNodeVerifiesSuccess = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_verifies_success", + Help: "The total number of successful chain ID verifications for the given RPC node", + }, []string{"network", "chainID", "nodeName"}) +) + +type NodeConfig interface { + PollFailureThreshold() uint32 + PollInterval() time.Duration + SelectionMode() string + SyncThreshold() uint32 + NodeIsSyncingEnabled() bool + FinalizedBlockPollInterval() time.Duration + EnforceRepeatableRead() bool + DeathDeclarationDelay() time.Duration +} + +type ChainConfig interface { + NodeNoNewHeadsThreshold() time.Duration + NoNewFinalizedHeadsThreshold() time.Duration + FinalityDepth() uint32 + FinalityTagEnabled() bool + FinalizedBlockOffset() uint32 +} + +type Node[ + CHAIN_ID ID, + RPC any, +] interface { + // State returns most accurate state of the Node on the moment of call. + // While some of the checks may be performed in the background and State may return cached value, critical, like + // `FinalizedBlockOutOfSync`, must be executed upon every call. + State() NodeState + // StateAndLatest returns nodeState with the latest ChainInfo observed by Node during current lifecycle. + StateAndLatest() (NodeState, ChainInfo) + // HighestUserObservations - returns highest ChainInfo ever observed by underlying RPC excluding results of health check requests + HighestUserObservations() ChainInfo + SetPoolChainInfoProvider(PoolChainInfoProvider) + // Name is a unique identifier for this node. + Name() string + // String - returns string representation of the node, useful for debugging (name + URLS used to connect to the RPC) + String() string + RPC() RPC + // UnsubscribeAllExceptAliveLoop - closes all subscriptions except the aliveLoop subscription + UnsubscribeAllExceptAliveLoop() + ConfiguredChainID() CHAIN_ID + // Order - returns priority order configured for the RPC + Order() int32 + // Start - starts health checks + Start(context.Context) error + Close() error +} + +type node[ + CHAIN_ID ID, + HEAD Head, + RPC RPCClient[CHAIN_ID, HEAD], +] struct { + services.StateMachine + lfcLog logger.Logger + name string + id int32 + chainID CHAIN_ID + nodePoolCfg NodeConfig + chainCfg ChainConfig + order int32 + chainFamily string + + ws url.URL + http *url.URL + + rpc RPC + + stateMu sync.RWMutex // protects state* fields + state NodeState + + poolInfoProvider PoolChainInfoProvider + + stopCh services.StopChan + // wg waits for subsidiary goroutines + wg sync.WaitGroup + + aliveLoopSub Subscription + finalizedBlockSub Subscription +} + +func NewNode[ + CHAIN_ID ID, + HEAD Head, + RPC RPCClient[CHAIN_ID, HEAD], +]( + nodeCfg NodeConfig, + chainCfg ChainConfig, + lggr logger.Logger, + wsuri url.URL, + httpuri *url.URL, + name string, + id int32, + chainID CHAIN_ID, + nodeOrder int32, + rpc RPC, + chainFamily string, +) Node[CHAIN_ID, RPC] { + n := new(node[CHAIN_ID, HEAD, RPC]) + n.name = name + n.id = id + n.chainID = chainID + n.nodePoolCfg = nodeCfg + n.chainCfg = chainCfg + n.ws = wsuri + n.order = nodeOrder + if httpuri != nil { + n.http = httpuri + } + n.stopCh = make(services.StopChan) + lggr = logger.Named(lggr, "Node") + lggr = logger.With(lggr, + "nodeTier", Primary.String(), + "nodeName", name, + "node", n.String(), + "chainID", chainID, + "nodeOrder", n.order, + ) + n.lfcLog = logger.Named(lggr, "Lifecycle") + n.rpc = rpc + n.chainFamily = chainFamily + return n +} + +func (n *node[CHAIN_ID, HEAD, RPC]) String() string { + s := fmt.Sprintf("(%s)%s:%s", Primary.String(), n.name, n.ws.String()) + if n.http != nil { + s = s + fmt.Sprintf(":%s", n.http.String()) + } + return s +} + +func (n *node[CHAIN_ID, HEAD, RPC]) ConfiguredChainID() (chainID CHAIN_ID) { + return n.chainID +} + +func (n *node[CHAIN_ID, HEAD, RPC]) Name() string { + return n.name +} + +func (n *node[CHAIN_ID, HEAD, RPC]) RPC() RPC { + return n.rpc +} + +// unsubscribeAllExceptAliveLoop is not thread-safe; it should only be called +// while holding the stateMu lock. +func (n *node[CHAIN_ID, HEAD, RPC]) unsubscribeAllExceptAliveLoop() { + aliveLoopSub := n.aliveLoopSub + finalizedBlockSub := n.finalizedBlockSub + n.rpc.UnsubscribeAllExcept(aliveLoopSub, finalizedBlockSub) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) UnsubscribeAllExceptAliveLoop() { + n.stateMu.Lock() + defer n.stateMu.Unlock() + n.unsubscribeAllExceptAliveLoop() +} + +func (n *node[CHAIN_ID, HEAD, RPC]) Close() error { + return n.StopOnce(n.name, n.close) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) close() error { + defer func() { + n.wg.Wait() + n.rpc.Close() + }() + + n.stateMu.Lock() + defer n.stateMu.Unlock() + + close(n.stopCh) + n.state = NodeStateClosed + return nil +} + +// Start dials and verifies the node +// Should only be called once in a node's lifecycle +// Return value is necessary to conform to interface but this will never +// actually return an error. +func (n *node[CHAIN_ID, HEAD, RPC]) Start(startCtx context.Context) error { + return n.StartOnce(n.name, func() error { + n.start(startCtx) + return nil + }) +} + +// start initially dials the node and verifies chain ID +// This spins off lifecycle goroutines. +// Not thread-safe. +// Node lifecycle is synchronous: only one goroutine should be running at a +// time. +func (n *node[CHAIN_ID, HEAD, RPC]) start(startCtx context.Context) { + if n.state != NodeStateUndialed { + panic(fmt.Sprintf("cannot dial node with state %v", n.state)) + } + + if err := n.rpc.Dial(startCtx); err != nil { + n.lfcLog.Errorw("Dial failed: Node is unreachable", "err", err) + n.declareUnreachable() + return + } + n.setState(NodeStateDialed) + + state := n.verifyConn(startCtx, n.lfcLog) + n.declareState(state) +} + +// verifyChainID checks that connection to the node matches the given chain ID +// Not thread-safe +// Pure verifyChainID: does not mutate node "state" field. +func (n *node[CHAIN_ID, HEAD, RPC]) verifyChainID(callerCtx context.Context, lggr logger.Logger) NodeState { + promPoolRPCNodeVerifies.WithLabelValues(n.chainFamily, n.chainID.String(), n.name).Inc() + promFailed := func() { + promPoolRPCNodeVerifiesFailed.WithLabelValues(n.chainFamily, n.chainID.String(), n.name).Inc() + } + + st := n.getCachedState() + switch st { + case NodeStateClosed: + // The node is already closed, and any subsequent transition is invalid. + // To make spotting such transitions a bit easier, return the invalid node state. + return NodeStateLen + case NodeStateDialed, NodeStateOutOfSync, NodeStateInvalidChainID, NodeStateSyncing: + default: + panic(fmt.Sprintf("cannot verify node in state %v", st)) + } + + var chainID CHAIN_ID + var err error + if chainID, err = n.rpc.ChainID(callerCtx); err != nil { + promFailed() + lggr.Errorw("Failed to verify chain ID for node", "err", err, "nodeState", n.getCachedState()) + return NodeStateUnreachable + } else if chainID.String() != n.chainID.String() { + promFailed() + err = fmt.Errorf( + "rpc ChainID doesn't match local chain ID: RPC ID=%s, local ID=%s, node name=%s: %w", + chainID.String(), + n.chainID.String(), + n.name, + errInvalidChainID, + ) + lggr.Errorw("Failed to verify RPC node; remote endpoint returned the wrong chain ID", "err", err, "nodeState", n.getCachedState()) + return NodeStateInvalidChainID + } + + promPoolRPCNodeVerifiesSuccess.WithLabelValues(n.chainFamily, n.chainID.String(), n.name).Inc() + + return NodeStateAlive +} + +// createVerifiedConn - establishes new connection with the RPC and verifies that it's valid: chainID matches, and it's not syncing. +// Returns desired state if one of the verifications fails. Otherwise, returns NodeStateAlive. +func (n *node[CHAIN_ID, HEAD, RPC]) createVerifiedConn(ctx context.Context, lggr logger.Logger) NodeState { + if err := n.rpc.Dial(ctx); err != nil { + n.lfcLog.Errorw("Dial failed: Node is unreachable", "err", err, "nodeState", n.getCachedState()) + return NodeStateUnreachable + } + + return n.verifyConn(ctx, lggr) +} + +// verifyConn - verifies that current connection is valid: chainID matches, and it's not syncing. +// Returns desired state if one of the verifications fails. Otherwise, returns NodeStateAlive. +func (n *node[CHAIN_ID, HEAD, RPC]) verifyConn(ctx context.Context, lggr logger.Logger) NodeState { + state := n.verifyChainID(ctx, lggr) + if state != NodeStateAlive { + return state + } + + if n.nodePoolCfg.NodeIsSyncingEnabled() { + isSyncing, err := n.rpc.IsSyncing(ctx) + if err != nil { + lggr.Errorw("Unexpected error while verifying RPC node synchronization status", "err", err, "nodeState", n.getCachedState()) + return NodeStateUnreachable + } + + if isSyncing { + lggr.Errorw("Verification failed: Node is syncing", "nodeState", n.getCachedState()) + return NodeStateSyncing + } + } + + return NodeStateAlive +} + +func (n *node[CHAIN_ID, HEAD, RPC]) Order() int32 { + return n.order +} + +func (n *node[CHAIN_ID, HEAD, RPC]) newCtx() (context.Context, context.CancelFunc) { + ctx, cancel := n.stopCh.NewCtx() + ctx = CtxAddHealthCheckFlag(ctx) + return ctx, cancel +} diff --git a/pkg/solana/client/multinode/node_fsm.go b/pkg/solana/client/multinode/node_fsm.go new file mode 100644 index 000000000..1111210c4 --- /dev/null +++ b/pkg/solana/client/multinode/node_fsm.go @@ -0,0 +1,370 @@ +package client + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + promPoolRPCNodeTransitionsToAlive = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_alive", + Help: transitionString(NodeStateAlive), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToInSync = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_in_sync", + Help: fmt.Sprintf("%s to %s", transitionString(NodeStateOutOfSync), NodeStateAlive), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToOutOfSync = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_out_of_sync", + Help: transitionString(NodeStateOutOfSync), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToUnreachable = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_unreachable", + Help: transitionString(NodeStateUnreachable), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToInvalidChainID = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_invalid_chain_id", + Help: transitionString(NodeStateInvalidChainID), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToUnusable = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_unusable", + Help: transitionString(NodeStateUnusable), + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeTransitionsToSyncing = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_transitions_to_syncing", + Help: transitionString(NodeStateSyncing), + }, []string{"chainID", "nodeName"}) +) + +// NodeState represents the current state of the node +// Node is a FSM (finite state machine) +type NodeState int + +func (n NodeState) String() string { + switch n { + case NodeStateUndialed: + return "Undialed" + case NodeStateDialed: + return "Dialed" + case NodeStateInvalidChainID: + return "InvalidChainID" + case NodeStateAlive: + return "Alive" + case NodeStateUnreachable: + return "Unreachable" + case NodeStateUnusable: + return "Unusable" + case NodeStateOutOfSync: + return "OutOfSync" + case NodeStateClosed: + return "Closed" + case NodeStateSyncing: + return "Syncing" + case NodeStateFinalizedBlockOutOfSync: + return "FinalizedBlockOutOfSync" + default: + return fmt.Sprintf("NodeState(%d)", n) + } +} + +// GoString prints a prettier state +func (n NodeState) GoString() string { + return fmt.Sprintf("NodeState%s(%d)", n.String(), n) +} + +const ( + // NodeStateUndialed is the first state of a virgin node + NodeStateUndialed = NodeState(iota) + // NodeStateDialed is after a node has successfully dialed but before it has verified the correct chain ID + NodeStateDialed + // NodeStateInvalidChainID is after chain ID verification failed + NodeStateInvalidChainID + // NodeStateAlive is a healthy node after chain ID verification succeeded + NodeStateAlive + // NodeStateUnreachable is a node that cannot be dialed or has disconnected + NodeStateUnreachable + // NodeStateOutOfSync is a node that is accepting connections but exceeded + // the failure threshold without sending any new heads. It will be + // disconnected, then put into a revive loop and re-awakened after redial + // if a new head arrives + NodeStateOutOfSync + // NodeStateUnusable is a sendonly node that has an invalid URL that can never be reached + NodeStateUnusable + // NodeStateClosed is after the connection has been closed and the node is at the end of its lifecycle + NodeStateClosed + // NodeStateSyncing is a node that is actively back-filling blockchain. Usually, it's a newly set up node that is + // still syncing the chain. The main difference from `NodeStateOutOfSync` is that it represents state relative + // to other primary nodes configured in the MultiNode. In contrast, `NodeStateSyncing` represents the internal state of + // the node (RPC). + NodeStateSyncing + // nodeStateFinalizedBlockOutOfSync - node is lagging behind on latest finalized block + NodeStateFinalizedBlockOutOfSync + // nodeStateLen tracks the number of states + NodeStateLen +) + +// allNodeStates represents all possible states a node can be in +var allNodeStates []NodeState + +func init() { + for s := NodeState(0); s < NodeStateLen; s++ { + allNodeStates = append(allNodeStates, s) + } +} + +// FSM methods + +// State allows reading the current state of the node. +func (n *node[CHAIN_ID, HEAD, RPC]) State() NodeState { + n.stateMu.RLock() + defer n.stateMu.RUnlock() + return n.recalculateState() +} + +func (n *node[CHAIN_ID, HEAD, RPC]) getCachedState() NodeState { + n.stateMu.RLock() + defer n.stateMu.RUnlock() + return n.state +} + +func (n *node[CHAIN_ID, HEAD, RPC]) recalculateState() NodeState { + if n.state != NodeStateAlive { + return n.state + } + + // double check that node is not lagging on finalized block + if n.nodePoolCfg.EnforceRepeatableRead() && n.isFinalizedBlockOutOfSync() { + return NodeStateFinalizedBlockOutOfSync + } + + return NodeStateAlive +} + +func (n *node[CHAIN_ID, HEAD, RPC]) isFinalizedBlockOutOfSync() bool { + if n.poolInfoProvider == nil { + return false + } + + highestObservedByCaller := n.poolInfoProvider.HighestUserObservations() + latest, _ := n.rpc.GetInterceptedChainInfo() + if n.chainCfg.FinalityTagEnabled() { + return latest.FinalizedBlockNumber < highestObservedByCaller.FinalizedBlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) + } + + return latest.BlockNumber < highestObservedByCaller.BlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) +} + +// StateAndLatest returns nodeState with the latest ChainInfo observed by Node during current lifecycle. +func (n *node[CHAIN_ID, HEAD, RPC]) StateAndLatest() (NodeState, ChainInfo) { + n.stateMu.RLock() + defer n.stateMu.RUnlock() + latest, _ := n.rpc.GetInterceptedChainInfo() + return n.recalculateState(), latest +} + +// HighestUserObservations - returns highest ChainInfo ever observed by external user of the Node +func (n *node[CHAIN_ID, HEAD, RPC]) HighestUserObservations() ChainInfo { + _, highestUserObservations := n.rpc.GetInterceptedChainInfo() + return highestUserObservations +} +func (n *node[CHAIN_ID, HEAD, RPC]) SetPoolChainInfoProvider(poolInfoProvider PoolChainInfoProvider) { + n.poolInfoProvider = poolInfoProvider +} + +// setState is only used by internal state management methods. +// This is low-level; care should be taken by the caller to ensure the new state is a valid transition. +// State changes should always be synchronous: only one goroutine at a time should change state. +// n.stateMu should not be locked for long periods of time because external clients expect a timely response from n.State() +func (n *node[CHAIN_ID, HEAD, RPC]) setState(s NodeState) { + n.stateMu.Lock() + defer n.stateMu.Unlock() + n.state = s +} + +// declareXXX methods change the state and pass conrol off the new state +// management goroutine + +func (n *node[CHAIN_ID, HEAD, RPC]) declareAlive() { + n.transitionToAlive(func() { + n.lfcLog.Infow("RPC Node is online", "nodeState", n.state) + n.wg.Add(1) + go n.aliveLoop() + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToAlive(fn func()) { + promPoolRPCNodeTransitionsToAlive.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateDialed, NodeStateInvalidChainID, NodeStateSyncing: + n.state = NodeStateAlive + default: + panic(transitionFail(n.state, NodeStateAlive)) + } + fn() +} + +// declareInSync puts a node back into Alive state, allowing it to be used by +// pool consumers again +func (n *node[CHAIN_ID, HEAD, RPC]) declareInSync() { + n.transitionToInSync(func() { + n.lfcLog.Infow("RPC Node is back in sync", "nodeState", n.state) + n.wg.Add(1) + go n.aliveLoop() + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToInSync(fn func()) { + promPoolRPCNodeTransitionsToAlive.WithLabelValues(n.chainID.String(), n.name).Inc() + promPoolRPCNodeTransitionsToInSync.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateOutOfSync, NodeStateSyncing: + n.state = NodeStateAlive + default: + panic(transitionFail(n.state, NodeStateAlive)) + } + fn() +} + +// declareOutOfSync puts a node into OutOfSync state, disconnecting all current +// clients and making it unavailable for use until back in-sync. +func (n *node[CHAIN_ID, HEAD, RPC]) declareOutOfSync(syncIssues syncStatus) { + n.transitionToOutOfSync(func() { + n.lfcLog.Errorw("RPC Node is out of sync", "nodeState", n.state, "syncIssues", syncIssues) + n.wg.Add(1) + go n.outOfSyncLoop(syncIssues) + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToOutOfSync(fn func()) { + promPoolRPCNodeTransitionsToOutOfSync.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateAlive: + n.unsubscribeAllExceptAliveLoop() + n.state = NodeStateOutOfSync + default: + panic(transitionFail(n.state, NodeStateOutOfSync)) + } + fn() +} + +func (n *node[CHAIN_ID, HEAD, RPC]) declareUnreachable() { + n.transitionToUnreachable(func() { + n.lfcLog.Errorw("RPC Node is unreachable", "nodeState", n.state) + n.wg.Add(1) + go n.unreachableLoop() + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToUnreachable(fn func()) { + promPoolRPCNodeTransitionsToUnreachable.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateUndialed, NodeStateDialed, NodeStateAlive, NodeStateOutOfSync, NodeStateInvalidChainID, NodeStateSyncing: + n.unsubscribeAllExceptAliveLoop() + n.state = NodeStateUnreachable + default: + panic(transitionFail(n.state, NodeStateUnreachable)) + } + fn() +} + +func (n *node[CHAIN_ID, HEAD, RPC]) declareState(state NodeState) { + if n.getCachedState() == NodeStateClosed { + return + } + switch state { + case NodeStateInvalidChainID: + n.declareInvalidChainID() + case NodeStateUnreachable: + n.declareUnreachable() + case NodeStateSyncing: + n.declareSyncing() + case NodeStateAlive: + n.declareAlive() + default: + panic(fmt.Sprintf("%#v state declaration is not implemented", state)) + } +} + +func (n *node[CHAIN_ID, HEAD, RPC]) declareInvalidChainID() { + n.transitionToInvalidChainID(func() { + n.lfcLog.Errorw("RPC Node has the wrong chain ID", "nodeState", n.state) + n.wg.Add(1) + go n.invalidChainIDLoop() + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToInvalidChainID(fn func()) { + promPoolRPCNodeTransitionsToInvalidChainID.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateDialed, NodeStateOutOfSync, NodeStateSyncing: + n.unsubscribeAllExceptAliveLoop() + n.state = NodeStateInvalidChainID + default: + panic(transitionFail(n.state, NodeStateInvalidChainID)) + } + fn() +} + +func (n *node[CHAIN_ID, HEAD, RPC]) declareSyncing() { + n.transitionToSyncing(func() { + n.lfcLog.Errorw("RPC Node is syncing", "nodeState", n.state) + n.wg.Add(1) + go n.syncingLoop() + }) +} + +func (n *node[CHAIN_ID, HEAD, RPC]) transitionToSyncing(fn func()) { + promPoolRPCNodeTransitionsToSyncing.WithLabelValues(n.chainID.String(), n.name).Inc() + n.stateMu.Lock() + defer n.stateMu.Unlock() + if n.state == NodeStateClosed { + return + } + switch n.state { + case NodeStateDialed, NodeStateOutOfSync, NodeStateInvalidChainID: + n.unsubscribeAllExceptAliveLoop() + n.state = NodeStateSyncing + default: + panic(transitionFail(n.state, NodeStateSyncing)) + } + + if !n.nodePoolCfg.NodeIsSyncingEnabled() { + panic("unexpected transition to NodeStateSyncing, while it's disabled") + } + fn() +} + +func transitionString(state NodeState) string { + return fmt.Sprintf("Total number of times node has transitioned to %s", state) +} + +func transitionFail(from NodeState, to NodeState) string { + return fmt.Sprintf("cannot transition from %#v to %#v", from, to) +} diff --git a/pkg/solana/client/multinode/node_lifecycle.go b/pkg/solana/client/multinode/node_lifecycle.go new file mode 100644 index 000000000..823a1abc3 --- /dev/null +++ b/pkg/solana/client/multinode/node_lifecycle.go @@ -0,0 +1,732 @@ +package client + +import ( + "context" + "fmt" + "math" + "math/big" + "time" + + "github.com/smartcontractkit/chainlink/v2/common/types" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/utils" + bigmath "github.com/smartcontractkit/chainlink-common/pkg/utils/big_math" + + iutils "github.com/smartcontractkit/chainlink/v2/common/internal/utils" +) + +var ( + promPoolRPCNodeHighestSeenBlock = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "pool_rpc_node_highest_seen_block", + Help: "The highest seen block for the given RPC node", + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeHighestFinalizedBlock = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "pool_rpc_node_highest_finalized_block", + Help: "The highest seen finalized block for the given RPC node", + }, []string{"chainID", "nodeName"}) + promPoolRPCNodeNumSeenBlocks = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_num_seen_blocks", + Help: "The total number of new blocks seen by the given RPC node", + }, []string{"chainID", "nodeName"}) + promPoolRPCNodePolls = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_polls_total", + Help: "The total number of poll checks for the given RPC node", + }, []string{"chainID", "nodeName"}) + promPoolRPCNodePollsFailed = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_polls_failed", + Help: "The total number of failed poll checks for the given RPC node", + }, []string{"chainID", "nodeName"}) + promPoolRPCNodePollsSuccess = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pool_rpc_node_polls_success", + Help: "The total number of successful poll checks for the given RPC node", + }, []string{"chainID", "nodeName"}) +) + +// zombieNodeCheckInterval controls how often to re-check to see if we need to +// state change in case we have to force a state transition due to no available +// nodes. +// NOTE: This only applies to out-of-sync nodes if they are the last available node +func zombieNodeCheckInterval(noNewHeadsThreshold time.Duration) time.Duration { + interval := noNewHeadsThreshold + if interval <= 0 || interval > QueryTimeout { + interval = QueryTimeout + } + return utils.WithJitter(interval) +} + +const ( + msgCannotDisable = "but cannot disable this connection because there are no other RPC endpoints, or all other RPC endpoints are dead." + msgDegradedState = "Chainlink is now operating in a degraded state and urgent action is required to resolve the issue" +) + +// Node is a FSM +// Each state has a loop that goes with it, which monitors the node and moves it into another state as necessary. +// Only one loop must run at a time. +// Each loop passes control onto the next loop as it exits, except when the node is Closed which terminates the loop permanently. + +// This handles node lifecycle for the ALIVE state +// Should only be run ONCE per node, after a successful Dial +func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { + defer n.wg.Done() + ctx, cancel := n.newCtx() + defer cancel() + + { + // sanity check + state := n.getCachedState() + switch state { + case NodeStateAlive: + case NodeStateClosed: + return + default: + panic(fmt.Sprintf("aliveLoop can only run for node in Alive state, got: %s", state)) + } + } + + noNewHeadsTimeoutThreshold := n.chainCfg.NodeNoNewHeadsThreshold() + noNewFinalizedBlocksTimeoutThreshold := n.chainCfg.NoNewFinalizedHeadsThreshold() + pollFailureThreshold := n.nodePoolCfg.PollFailureThreshold() + pollInterval := n.nodePoolCfg.PollInterval() + + lggr := logger.Sugared(n.lfcLog).Named("Alive").With("noNewHeadsTimeoutThreshold", noNewHeadsTimeoutThreshold, "pollInterval", pollInterval, "pollFailureThreshold", pollFailureThreshold) + lggr.Tracew("Alive loop starting", "nodeState", n.getCachedState()) + + headsSub, err := n.registerNewSubscription(ctx, lggr.With("subscriptionType", "heads"), + n.chainCfg.NodeNoNewHeadsThreshold(), n.rpc.SubscribeToHeads) + if err != nil { + lggr.Errorw("Initial subscribe for heads failed", "nodeState", n.getCachedState(), "err", err) + n.declareUnreachable() + return + } + + n.stateMu.Lock() + n.aliveLoopSub = headsSub.sub + n.stateMu.Unlock() + defer func() { + defer headsSub.sub.Unsubscribe() + n.stateMu.Lock() + n.aliveLoopSub = nil + n.stateMu.Unlock() + }() + + var pollCh <-chan time.Time + if pollInterval > 0 { + lggr.Debug("Polling enabled") + pollT := time.NewTicker(pollInterval) + defer pollT.Stop() + pollCh = pollT.C + if pollFailureThreshold > 0 { + // polling can be enabled with no threshold to enable polling but + // the node will not be marked offline regardless of the number of + // poll failures + lggr.Debug("Polling liveness checking enabled") + } + } else { + lggr.Debug("Polling disabled") + } + + var finalizedHeadsSub headSubscription[HEAD] + if n.chainCfg.FinalityTagEnabled() { + finalizedHeadsSub, err = n.registerNewSubscription(ctx, lggr.With("subscriptionType", "finalizedHeads"), + n.chainCfg.NoNewFinalizedHeadsThreshold(), n.rpc.SubscribeToFinalizedHeads) + if err != nil { + lggr.Errorw("Failed to subscribe to finalized heads", "err", err) + n.declareUnreachable() + return + } + + n.stateMu.Lock() + n.finalizedBlockSub = finalizedHeadsSub.sub + n.stateMu.Unlock() + defer func() { + finalizedHeadsSub.Unsubscribe() + n.stateMu.Lock() + n.finalizedBlockSub = nil + n.stateMu.Unlock() + }() + } + + localHighestChainInfo, _ := n.rpc.GetInterceptedChainInfo() + var pollFailures uint32 + + for { + select { + case <-ctx.Done(): + return + case <-pollCh: + promPoolRPCNodePolls.WithLabelValues(n.chainID.String(), n.name).Inc() + lggr.Tracew("Pinging RPC", "nodeState", n.State(), "pollFailures", pollFailures) + pollCtx, cancel := context.WithTimeout(ctx, pollInterval) + err = n.RPC().Ping(pollCtx) + cancel() + if err != nil { + // prevent overflow + if pollFailures < math.MaxUint32 { + promPoolRPCNodePollsFailed.WithLabelValues(n.chainID.String(), n.name).Inc() + pollFailures++ + } + lggr.Warnw(fmt.Sprintf("Poll failure, RPC endpoint %s failed to respond properly", n.String()), "err", err, "pollFailures", pollFailures, "nodeState", n.getCachedState()) + } else { + lggr.Debugw("Ping successful", "nodeState", n.State()) + promPoolRPCNodePollsSuccess.WithLabelValues(n.chainID.String(), n.name).Inc() + pollFailures = 0 + } + if pollFailureThreshold > 0 && pollFailures >= pollFailureThreshold { + lggr.Errorw(fmt.Sprintf("RPC endpoint failed to respond to %d consecutive polls", pollFailures), "pollFailures", pollFailures, "nodeState", n.getCachedState()) + if n.poolInfoProvider != nil { + if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 2 { + lggr.Criticalf("RPC endpoint failed to respond to polls; %s %s", msgCannotDisable, msgDegradedState) + continue + } + } + n.declareUnreachable() + return + } + _, latestChainInfo := n.StateAndLatest() + if outOfSync, liveNodes := n.syncStatus(latestChainInfo.BlockNumber, latestChainInfo.TotalDifficulty); outOfSync { + // note: there must be another live node for us to be out of sync + lggr.Errorw("RPC endpoint has fallen behind", "blockNumber", latestChainInfo.BlockNumber, "totalDifficulty", latestChainInfo.TotalDifficulty, "nodeState", n.getCachedState()) + if liveNodes < 2 { + lggr.Criticalf("RPC endpoint has fallen behind; %s %s", msgCannotDisable, msgDegradedState) + continue + } + n.declareOutOfSync(syncStatusNotInSyncWithPool) + return + } + case bh, open := <-headsSub.Heads: + if !open { + lggr.Errorw("Subscription channel unexpectedly closed", "nodeState", n.getCachedState()) + n.declareUnreachable() + return + } + receivedNewHead := n.onNewHead(lggr, &localHighestChainInfo, bh) + if receivedNewHead && noNewHeadsTimeoutThreshold > 0 { + headsSub.ResetTimer(noNewHeadsTimeoutThreshold) + } + case err = <-headsSub.Errors: + lggr.Errorw("Subscription was terminated", "err", err, "nodeState", n.getCachedState()) + n.declareUnreachable() + return + case <-headsSub.NoNewHeads: + // We haven't received a head on the channel for at least the + // threshold amount of time, mark it broken + lggr.Errorw(fmt.Sprintf("RPC endpoint detected out of sync; no new heads received for %s (last head received was %v)", noNewHeadsTimeoutThreshold, localHighestChainInfo.BlockNumber), "nodeState", n.getCachedState(), "latestReceivedBlockNumber", localHighestChainInfo.BlockNumber, "noNewHeadsTimeoutThreshold", noNewHeadsTimeoutThreshold) + if n.poolInfoProvider != nil { + if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 2 { + lggr.Criticalf("RPC endpoint detected out of sync; %s %s", msgCannotDisable, msgDegradedState) + // We don't necessarily want to wait the full timeout to check again, we should + // check regularly and log noisily in this state + headsSub.ResetTimer(zombieNodeCheckInterval(noNewHeadsTimeoutThreshold)) + continue + } + } + n.declareOutOfSync(syncStatusNoNewHead) + return + case latestFinalized, open := <-finalizedHeadsSub.Heads: + if !open { + lggr.Errorw("Finalized heads subscription channel unexpectedly closed") + n.declareUnreachable() + return + } + if !latestFinalized.IsValid() { + lggr.Warn("Latest finalized block is not valid") + continue + } + + latestFinalizedBN := latestFinalized.BlockNumber() + if latestFinalizedBN > localHighestChainInfo.FinalizedBlockNumber { + promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) + localHighestChainInfo.FinalizedBlockNumber = latestFinalizedBN + } + + case <-finalizedHeadsSub.NoNewHeads: + // We haven't received a finalized head on the channel for at least the + // threshold amount of time, mark it broken + lggr.Errorw(fmt.Sprintf("RPC's finalized state is out of sync; no new finalized heads received for %s (last finalized head received was %v)", noNewFinalizedBlocksTimeoutThreshold, localHighestChainInfo.FinalizedBlockNumber), "latestReceivedBlockNumber", localHighestChainInfo.BlockNumber) + if n.poolInfoProvider != nil { + if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 2 { + lggr.Criticalf("RPC's finalized state is out of sync; %s %s", msgCannotDisable, msgDegradedState) + // We don't necessarily want to wait the full timeout to check again, we should + // check regularly and log noisily in this state + finalizedHeadsSub.ResetTimer(zombieNodeCheckInterval(noNewFinalizedBlocksTimeoutThreshold)) + continue + } + } + n.declareOutOfSync(syncStatusNoNewFinalizedHead) + return + case <-finalizedHeadsSub.Errors: + lggr.Errorw("Finalized heads subscription was terminated", "err", err) + n.declareUnreachable() + return + } + } +} + +type headSubscription[HEAD any] struct { + Heads <-chan HEAD + Errors <-chan error + NoNewHeads <-chan time.Time + + noNewHeadsTicker *time.Ticker + sub types.Subscription + cleanUpTasks []func() +} + +func (sub *headSubscription[HEAD]) ResetTimer(duration time.Duration) { + sub.noNewHeadsTicker.Reset(duration) +} + +func (sub *headSubscription[HEAD]) Unsubscribe() { + for _, doCleanUp := range sub.cleanUpTasks { + doCleanUp() + } +} + +func (n *node[CHAIN_ID, HEAD, PRC]) registerNewSubscription(ctx context.Context, lggr logger.SugaredLogger, + noNewDataThreshold time.Duration, newSub func(ctx context.Context) (<-chan HEAD, types.Subscription, error)) (headSubscription[HEAD], error) { + result := headSubscription[HEAD]{} + var err error + var sub types.Subscription + result.Heads, sub, err = newSub(ctx) + if err != nil { + return result, err + } + + result.Errors = sub.Err() + lggr.Debug("Successfully subscribed") + + // TODO: will be removed as part of merging effort with BCI-2875 + result.sub = sub + //n.stateMu.Lock() + //n.healthCheckSubs = append(n.healthCheckSubs, sub) + //n.stateMu.Unlock() + + result.cleanUpTasks = append(result.cleanUpTasks, sub.Unsubscribe) + + if noNewDataThreshold > 0 { + lggr.Debugw("Subscription liveness checking enabled") + result.noNewHeadsTicker = time.NewTicker(noNewDataThreshold) + result.NoNewHeads = result.noNewHeadsTicker.C + result.cleanUpTasks = append(result.cleanUpTasks, result.noNewHeadsTicker.Stop) + } else { + lggr.Debug("Subscription liveness checking disabled") + } + + return result, nil +} + +func (n *node[CHAIN_ID, HEAD, RPC]) onNewFinalizedHead(lggr logger.SugaredLogger, chainInfo *ChainInfo, latestFinalized HEAD) bool { + if !latestFinalized.IsValid() { + lggr.Warn("Latest finalized block is not valid") + return false + } + + latestFinalizedBN := latestFinalized.BlockNumber() + lggr.Tracew("Got latest finalized head", "latestFinalized", latestFinalized) + if latestFinalizedBN <= chainInfo.FinalizedBlockNumber { + lggr.Tracew("Ignoring previously seen finalized block number") + return false + } + + promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) + chainInfo.FinalizedBlockNumber = latestFinalizedBN + return true +} + +func (n *node[CHAIN_ID, HEAD, RPC]) onNewHead(lggr logger.SugaredLogger, chainInfo *ChainInfo, head HEAD) bool { + if !head.IsValid() { + lggr.Warn("Latest head is not valid") + return false + } + + promPoolRPCNodeNumSeenBlocks.WithLabelValues(n.chainID.String(), n.name).Inc() + lggr.Tracew("Got head", "head", head) + lggr = lggr.With("latestReceivedBlockNumber", chainInfo.BlockNumber, "blockNumber", head.BlockNumber(), "nodeState", n.getCachedState()) + if head.BlockNumber() <= chainInfo.BlockNumber { + lggr.Tracew("Ignoring previously seen block number") + return false + } + + promPoolRPCNodeHighestSeenBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(head.BlockNumber())) + chainInfo.BlockNumber = head.BlockNumber() + + if !n.chainCfg.FinalityTagEnabled() { + latestFinalizedBN := max(head.BlockNumber()-int64(n.chainCfg.FinalityDepth()), 0) + if latestFinalizedBN > chainInfo.FinalizedBlockNumber { + promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) + chainInfo.FinalizedBlockNumber = latestFinalizedBN + } + } + + return true +} + +// syncStatus returns outOfSync true if num or td is more than SyncThresold behind the best node. +// Always returns outOfSync false for SyncThreshold 0. +// liveNodes is only included when outOfSync is true. +func (n *node[CHAIN_ID, HEAD, RPC]) syncStatus(num int64, td *big.Int) (outOfSync bool, liveNodes int) { + if n.poolInfoProvider == nil { + return // skip for tests + } + threshold := n.nodePoolCfg.SyncThreshold() + if threshold == 0 { + return // disabled + } + // Check against best node + ln, ci := n.poolInfoProvider.LatestChainInfo() + mode := n.nodePoolCfg.SelectionMode() + switch mode { + case NodeSelectionModeHighestHead, NodeSelectionModeRoundRobin, NodeSelectionModePriorityLevel: + return num < ci.BlockNumber-int64(threshold), ln + case NodeSelectionModeTotalDifficulty: + bigThreshold := big.NewInt(int64(threshold)) + return td.Cmp(bigmath.Sub(ci.TotalDifficulty, bigThreshold)) < 0, ln + default: + panic("unrecognized NodeSelectionMode: " + mode) + } +} + +const ( + msgReceivedBlock = "Received block for RPC node, waiting until back in-sync to mark as live again" + msgReceivedFinalizedBlock = "Received new finalized block for RPC node, waiting until back in-sync to mark as live again" + msgInSync = "RPC node back in sync" +) + +// isOutOfSyncWithPool returns outOfSync true if num or td is more than SyncThresold behind the best node. +// Always returns outOfSync false for SyncThreshold 0. +// liveNodes is only included when outOfSync is true. +func (n *node[CHAIN_ID, HEAD, RPC]) isOutOfSyncWithPool(localState ChainInfo) (outOfSync bool, liveNodes int) { + if n.poolInfoProvider == nil { + n.lfcLog.Warn("skipping sync state against the pool - should only occur in tests") + return // skip for tests + } + threshold := n.nodePoolCfg.SyncThreshold() + if threshold == 0 { + return // disabled + } + // Check against best node + ln, ci := n.poolInfoProvider.LatestChainInfo() + mode := n.nodePoolCfg.SelectionMode() + switch mode { + case NodeSelectionModeHighestHead, NodeSelectionModeRoundRobin, NodeSelectionModePriorityLevel: + return localState.BlockNumber < ci.BlockNumber-int64(threshold), ln + case NodeSelectionModeTotalDifficulty: + bigThreshold := big.NewInt(int64(threshold)) + return localState.TotalDifficulty.Cmp(bigmath.Sub(ci.TotalDifficulty, bigThreshold)) < 0, ln + default: + panic("unrecognized NodeSelectionMode: " + mode) + } +} + +// outOfSyncLoop takes an OutOfSync node and waits until isOutOfSync returns false to go back to live status +func (n *node[CHAIN_ID, HEAD, RPC]) outOfSyncLoop(syncIssues syncStatus) { + defer n.wg.Done() + ctx, cancel := n.newCtx() + defer cancel() + + { + // sanity check + state := n.getCachedState() + switch state { + case NodeStateOutOfSync: + case NodeStateClosed: + return + default: + panic(fmt.Sprintf("outOfSyncLoop can only run for node in OutOfSync state, got: %s", state)) + } + } + + outOfSyncAt := time.Now() + + // set logger name to OutOfSync or FinalizedBlockOutOfSync + lggr := logger.Sugared(logger.Named(n.lfcLog, n.getCachedState().String())).With("nodeState", n.getCachedState()) + lggr.Debugw("Trying to revive out-of-sync RPC node") + + // Need to redial since out-of-sync nodes are automatically disconnected + state := n.createVerifiedConn(ctx, lggr) + if state != NodeStateAlive { + n.declareState(state) + return + } + + noNewHeadsTimeoutThreshold := n.chainCfg.NodeNoNewHeadsThreshold() + headsSub, err := n.registerNewSubscription(ctx, lggr.With("subscriptionType", "heads"), + noNewHeadsTimeoutThreshold, n.rpc.SubscribeToHeads) + if err != nil { + lggr.Errorw("Failed to subscribe heads on out-of-sync RPC node", "err", err) + n.declareUnreachable() + return + } + + lggr.Tracew("Successfully subscribed to heads feed on out-of-sync RPC node") + defer headsSub.Unsubscribe() + + noNewFinalizedBlocksTimeoutThreshold := n.chainCfg.NoNewFinalizedHeadsThreshold() + var finalizedHeadsSub headSubscription[HEAD] + if n.chainCfg.FinalityTagEnabled() { + finalizedHeadsSub, err = n.registerNewSubscription(ctx, lggr.With("subscriptionType", "finalizedHeads"), + noNewFinalizedBlocksTimeoutThreshold, n.rpc.SubscribeToFinalizedHeads) + if err != nil { + lggr.Errorw("Subscribe to finalized heads failed on out-of-sync RPC node", "err", err) + n.declareUnreachable() + return + } + + lggr.Tracew("Successfully subscribed to finalized heads feed on out-of-sync RPC node") + defer finalizedHeadsSub.Unsubscribe() + } + + _, localHighestChainInfo := n.rpc.GetInterceptedChainInfo() + for { + if syncIssues == syncStatusSynced { + // back in-sync! flip back into alive loop + lggr.Infow(fmt.Sprintf("%s: %s. Node was out-of-sync for %s", msgInSync, n.String(), time.Since(outOfSyncAt))) + n.declareInSync() + return + } + + select { + case <-ctx.Done(): + return + case head, open := <-headsSub.Heads: + if !open { + lggr.Errorw("Subscription channel unexpectedly closed", "nodeState", n.getCachedState()) + n.declareUnreachable() + return + } + + if !n.onNewHead(lggr, &localHighestChainInfo, head) { + continue + } + + // received a new head - clear NoNewHead flag + syncIssues &= ^syncStatusNoNewHead + if outOfSync, _ := n.isOutOfSyncWithPool(localHighestChainInfo); !outOfSync { + // we caught up with the pool - clear NotInSyncWithPool flag + syncIssues &= ^syncStatusNotInSyncWithPool + } else { + // we've received new head, but lagging behind the pool, add NotInSyncWithPool flag to prevent false transition to alive + syncIssues |= syncStatusNotInSyncWithPool + } + + if noNewHeadsTimeoutThreshold > 0 { + headsSub.ResetTimer(noNewHeadsTimeoutThreshold) + } + + lggr.Debugw(msgReceivedBlock, "blockNumber", head.BlockNumber(), "blockDifficulty", head.BlockDifficulty(), "syncIssues", syncIssues) + case <-time.After(zombieNodeCheckInterval(noNewHeadsTimeoutThreshold)): + if n.poolInfoProvider != nil { + if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 1 { + lggr.Criticalw("RPC endpoint is still out of sync, but there are no other available nodes. This RPC node will be forcibly moved back into the live pool in a degraded state", "syncIssues", syncIssues) + n.declareInSync() + return + } + } + case err := <-headsSub.Errors: + lggr.Errorw("Subscription was terminated", "err", err) + n.declareUnreachable() + return + case <-headsSub.NoNewHeads: + // we are not resetting the timer, as there is no need to add syncStatusNoNewHead until it's removed on new head. + syncIssues |= syncStatusNoNewHead + lggr.Debugw(fmt.Sprintf("No new heads received for %s. Node stays out-of-sync due to sync issues: %s", noNewHeadsTimeoutThreshold, syncIssues)) + case latestFinalized, open := <-finalizedHeadsSub.Heads: + if !open { + lggr.Errorw("Finalized heads subscription channel unexpectedly closed") + n.declareUnreachable() + return + } + if !latestFinalized.IsValid() { + lggr.Warn("Latest finalized block is not valid") + continue + } + + receivedNewHead := n.onNewFinalizedHead(lggr, &localHighestChainInfo, latestFinalized) + if !receivedNewHead { + continue + } + + // on new finalized head remove NoNewFinalizedHead flag from the mask + syncIssues &= ^syncStatusNoNewFinalizedHead + if noNewFinalizedBlocksTimeoutThreshold > 0 { + finalizedHeadsSub.ResetTimer(noNewFinalizedBlocksTimeoutThreshold) + } + + lggr.Debugw(msgReceivedFinalizedBlock, "blockNumber", latestFinalized.BlockNumber(), "syncIssues", syncIssues) + case err := <-finalizedHeadsSub.Errors: + lggr.Errorw("Finalized head subscription was terminated", "err", err) + n.declareUnreachable() + return + case <-finalizedHeadsSub.NoNewHeads: + // we are not resetting the timer, as there is no need to add syncStatusNoNewFinalizedHead until it's removed on new finalized head. + syncIssues |= syncStatusNoNewFinalizedHead + lggr.Debugw(fmt.Sprintf("No new finalized heads received for %s. Node stays out-of-sync due to sync issues: %s", noNewFinalizedBlocksTimeoutThreshold, syncIssues)) + } + } +} + +func (n *node[CHAIN_ID, HEAD, RPC]) unreachableLoop() { + defer n.wg.Done() + ctx, cancel := n.newCtx() + defer cancel() + + { + // sanity check + state := n.getCachedState() + switch state { + case NodeStateUnreachable: + case NodeStateClosed: + return + default: + panic(fmt.Sprintf("unreachableLoop can only run for node in Unreachable state, got: %s", state)) + } + } + + unreachableAt := time.Now() + + lggr := logger.Sugared(logger.Named(n.lfcLog, "Unreachable")) + lggr.Debugw("Trying to revive unreachable RPC node", "nodeState", n.getCachedState()) + + dialRetryBackoff := iutils.NewRedialBackoff() + + for { + select { + case <-ctx.Done(): + return + case <-time.After(dialRetryBackoff.Duration()): + lggr.Tracew("Trying to re-dial RPC node", "nodeState", n.getCachedState()) + + err := n.rpc.Dial(ctx) + if err != nil { + lggr.Errorw(fmt.Sprintf("Failed to redial RPC node; still unreachable: %v", err), "err", err, "nodeState", n.getCachedState()) + continue + } + + n.setState(NodeStateDialed) + + state := n.verifyConn(ctx, lggr) + switch state { + case NodeStateUnreachable: + n.setState(NodeStateUnreachable) + continue + case NodeStateAlive: + lggr.Infow(fmt.Sprintf("Successfully redialled and verified RPC node %s. Node was offline for %s", n.String(), time.Since(unreachableAt)), "nodeState", n.getCachedState()) + fallthrough + default: + n.declareState(state) + return + } + } + } +} + +func (n *node[CHAIN_ID, HEAD, RPC]) invalidChainIDLoop() { + defer n.wg.Done() + ctx, cancel := n.newCtx() + defer cancel() + + { + // sanity check + state := n.getCachedState() + switch state { + case NodeStateInvalidChainID: + case NodeStateClosed: + return + default: + panic(fmt.Sprintf("invalidChainIDLoop can only run for node in InvalidChainID state, got: %s", state)) + } + } + + invalidAt := time.Now() + + lggr := logger.Named(n.lfcLog, "InvalidChainID") + + // Need to redial since invalid chain ID nodes are automatically disconnected + state := n.createVerifiedConn(ctx, lggr) + if state != NodeStateInvalidChainID { + n.declareState(state) + return + } + + lggr.Debugw(fmt.Sprintf("Periodically re-checking RPC node %s with invalid chain ID", n.String()), "nodeState", n.getCachedState()) + + chainIDRecheckBackoff := iutils.NewRedialBackoff() + + for { + select { + case <-ctx.Done(): + return + case <-time.After(chainIDRecheckBackoff.Duration()): + state := n.verifyConn(ctx, lggr) + switch state { + case NodeStateInvalidChainID: + continue + case NodeStateAlive: + lggr.Infow(fmt.Sprintf("Successfully verified RPC node. Node was offline for %s", time.Since(invalidAt)), "nodeState", n.getCachedState()) + fallthrough + default: + n.declareState(state) + return + } + } + } +} + +func (n *node[CHAIN_ID, HEAD, RPC]) syncingLoop() { + defer n.wg.Done() + ctx, cancel := n.newCtx() + defer cancel() + + { + // sanity check + state := n.getCachedState() + switch state { + case NodeStateSyncing: + case NodeStateClosed: + return + default: + panic(fmt.Sprintf("syncingLoop can only run for node in NodeStateSyncing state, got: %s", state)) + } + } + + syncingAt := time.Now() + + lggr := logger.Sugared(logger.Named(n.lfcLog, "Syncing")) + lggr.Debugw(fmt.Sprintf("Periodically re-checking RPC node %s with syncing status", n.String()), "nodeState", n.getCachedState()) + // Need to redial since syncing nodes are automatically disconnected + state := n.createVerifiedConn(ctx, lggr) + if state != NodeStateSyncing { + n.declareState(state) + return + } + + recheckBackoff := iutils.NewRedialBackoff() + + for { + select { + case <-ctx.Done(): + return + case <-time.After(recheckBackoff.Duration()): + lggr.Tracew("Trying to recheck if the node is still syncing", "nodeState", n.getCachedState()) + isSyncing, err := n.rpc.IsSyncing(ctx) + if err != nil { + lggr.Errorw("Unexpected error while verifying RPC node synchronization status", "err", err, "nodeState", n.getCachedState()) + n.declareUnreachable() + return + } + + if isSyncing { + lggr.Errorw("Verification failed: Node is syncing", "nodeState", n.getCachedState()) + continue + } + + lggr.Infow(fmt.Sprintf("Successfully verified RPC node. Node was syncing for %s", time.Since(syncingAt)), "nodeState", n.getCachedState()) + n.declareAlive() + return + } + } +} diff --git a/pkg/solana/client/multinode/node_selector.go b/pkg/solana/client/multinode/node_selector.go new file mode 100644 index 000000000..372b521bb --- /dev/null +++ b/pkg/solana/client/multinode/node_selector.go @@ -0,0 +1,43 @@ +package client + +import ( + "fmt" + + "github.com/smartcontractkit/chainlink/v2/common/types" +) + +const ( + NodeSelectionModeHighestHead = "HighestHead" + NodeSelectionModeRoundRobin = "RoundRobin" + NodeSelectionModeTotalDifficulty = "TotalDifficulty" + NodeSelectionModePriorityLevel = "PriorityLevel" +) + +type NodeSelector[ + CHAIN_ID types.ID, + RPC any, +] interface { + // Select returns a Node, or nil if none can be selected. + // Implementation must be thread-safe. + Select() Node[CHAIN_ID, RPC] + // Name returns the strategy name, e.g. "HighestHead" or "RoundRobin" + Name() string +} + +func newNodeSelector[ + CHAIN_ID types.ID, + RPC any, +](selectionMode string, nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { + switch selectionMode { + case NodeSelectionModeHighestHead: + return NewHighestHeadNodeSelector[CHAIN_ID, RPC](nodes) + case NodeSelectionModeRoundRobin: + return NewRoundRobinSelector[CHAIN_ID, RPC](nodes) + case NodeSelectionModeTotalDifficulty: + return NewTotalDifficultyNodeSelector[CHAIN_ID, RPC](nodes) + case NodeSelectionModePriorityLevel: + return NewPriorityLevelNodeSelector[CHAIN_ID, RPC](nodes) + default: + panic(fmt.Sprintf("unsupported NodeSelectionMode: %s", selectionMode)) + } +} diff --git a/pkg/solana/client/multinode/poller.go b/pkg/solana/client/multinode/poller.go new file mode 100644 index 000000000..d6080722c --- /dev/null +++ b/pkg/solana/client/multinode/poller.go @@ -0,0 +1,99 @@ +package client + +import ( + "context" + "sync" + "time" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" + + "github.com/smartcontractkit/chainlink/v2/common/types" +) + +// Poller is a component that polls a function at a given interval +// and delivers the result to a channel. It is used by multinode to poll +// for new heads and implements the Subscription interface. +type Poller[T any] struct { + services.StateMachine + pollingInterval time.Duration + pollingFunc func(ctx context.Context) (T, error) + pollingTimeout time.Duration + logger logger.Logger + channel chan<- T + errCh chan error + + stopCh services.StopChan + wg sync.WaitGroup +} + +// NewPoller creates a new Poller instance and returns a channel to receive the polled data +func NewPoller[ + T any, +](pollingInterval time.Duration, pollingFunc func(ctx context.Context) (T, error), pollingTimeout time.Duration, logger logger.Logger) (Poller[T], <-chan T) { + channel := make(chan T) + return Poller[T]{ + pollingInterval: pollingInterval, + pollingFunc: pollingFunc, + pollingTimeout: pollingTimeout, + channel: channel, + logger: logger, + errCh: make(chan error), + stopCh: make(chan struct{}), + }, channel +} + +var _ types.Subscription = &Poller[any]{} + +func (p *Poller[T]) Start() error { + return p.StartOnce("Poller", func() error { + p.wg.Add(1) + go p.pollingLoop() + return nil + }) +} + +// Unsubscribe cancels the sending of events to the data channel +func (p *Poller[T]) Unsubscribe() { + _ = p.StopOnce("Poller", func() error { + close(p.stopCh) + p.wg.Wait() + close(p.errCh) + close(p.channel) + return nil + }) +} + +func (p *Poller[T]) Err() <-chan error { + return p.errCh +} + +func (p *Poller[T]) pollingLoop() { + defer p.wg.Done() + + ticker := time.NewTicker(p.pollingInterval) + defer ticker.Stop() + + for { + select { + case <-p.stopCh: + return + case <-ticker.C: + // Set polling timeout + pollingCtx, cancelPolling := p.stopCh.CtxCancel(context.WithTimeout(context.Background(), p.pollingTimeout)) + // Execute polling function + result, err := p.pollingFunc(pollingCtx) + cancelPolling() + if err != nil { + p.logger.Warnf("polling error: %v", err) + continue + } + // Send result to channel or block if channel is full + select { + case p.channel <- result: + case <-p.stopCh: + return + } + } + } +} diff --git a/pkg/solana/client/multinode/poller_test.go b/pkg/solana/client/multinode/poller_test.go new file mode 100644 index 000000000..91af57930 --- /dev/null +++ b/pkg/solana/client/multinode/poller_test.go @@ -0,0 +1,187 @@ +package client + +import ( + "context" + "fmt" + "math/big" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" +) + +func Test_Poller(t *testing.T) { + lggr := logger.Test(t) + + t.Run("Test multiple start", func(t *testing.T) { + pollFunc := func(ctx context.Context) (Head, error) { + return nil, nil + } + + poller, _ := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) + err := poller.Start() + require.NoError(t, err) + + err = poller.Start() + require.Error(t, err) + poller.Unsubscribe() + }) + + t.Run("Test polling for heads", func(t *testing.T) { + // Mock polling function that returns a new value every time it's called + var pollNumber int + pollLock := sync.Mutex{} + pollFunc := func(ctx context.Context) (Head, error) { + pollLock.Lock() + defer pollLock.Unlock() + pollNumber++ + h := head{ + BlockNumber: int64(pollNumber), + BlockDifficulty: big.NewInt(int64(pollNumber)), + } + return h.ToMockHead(t), nil + } + + // Create poller and start to receive data + poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) + require.NoError(t, poller.Start()) + defer poller.Unsubscribe() + + // Receive updates from the poller + pollCount := 0 + pollMax := 50 + for ; pollCount < pollMax; pollCount++ { + h := <-channel + assert.Equal(t, int64(pollCount+1), h.BlockNumber()) + } + }) + + t.Run("Test polling errors", func(t *testing.T) { + // Mock polling function that returns an error + var pollNumber int + pollLock := sync.Mutex{} + pollFunc := func(ctx context.Context) (Head, error) { + pollLock.Lock() + defer pollLock.Unlock() + pollNumber++ + return nil, fmt.Errorf("polling error %d", pollNumber) + } + + olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + + // Create poller and subscribe to receive data + poller, _ := NewPoller[Head](time.Millisecond, pollFunc, time.Second, olggr) + require.NoError(t, poller.Start()) + defer poller.Unsubscribe() + + // Ensure that all errors were logged as expected + logsSeen := func() bool { + for pollCount := 0; pollCount < 50; pollCount++ { + numLogs := observedLogs.FilterMessage(fmt.Sprintf("polling error: polling error %d", pollCount+1)).Len() + if numLogs != 1 { + return false + } + } + return true + } + require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) + }) + + t.Run("Test polling timeout", func(t *testing.T) { + pollFunc := func(ctx context.Context) (Head, error) { + if <-ctx.Done(); true { + return nil, ctx.Err() + } + return nil, nil + } + + // Set instant timeout + pollingTimeout := time.Duration(0) + + olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + + // Create poller and subscribe to receive data + poller, _ := NewPoller[Head](time.Millisecond, pollFunc, pollingTimeout, olggr) + require.NoError(t, poller.Start()) + defer poller.Unsubscribe() + + // Ensure that timeout errors were logged as expected + logsSeen := func() bool { + return observedLogs.FilterMessage("polling error: context deadline exceeded").Len() >= 1 + } + require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) + }) + + t.Run("Test unsubscribe during polling", func(t *testing.T) { + wait := make(chan struct{}) + closeOnce := sync.OnceFunc(func() { close(wait) }) + pollFunc := func(ctx context.Context) (Head, error) { + closeOnce() + // Block in polling function until context is cancelled + if <-ctx.Done(); true { + return nil, ctx.Err() + } + return nil, nil + } + + // Set long timeout + pollingTimeout := time.Minute + + olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + + // Create poller and subscribe to receive data + poller, _ := NewPoller[Head](time.Millisecond, pollFunc, pollingTimeout, olggr) + require.NoError(t, poller.Start()) + + // Unsubscribe while blocked in polling function + <-wait + poller.Unsubscribe() + + // Ensure error was logged + logsSeen := func() bool { + return observedLogs.FilterMessage("polling error: context canceled").Len() >= 1 + } + require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) + }) +} + +func Test_Poller_Unsubscribe(t *testing.T) { + lggr := logger.Test(t) + pollFunc := func(ctx context.Context) (Head, error) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + h := head{ + BlockNumber: 0, + BlockDifficulty: big.NewInt(0), + } + return h.ToMockHead(t), nil + } + } + + t.Run("Test multiple unsubscribe", func(t *testing.T) { + poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) + err := poller.Start() + require.NoError(t, err) + + <-channel + poller.Unsubscribe() + poller.Unsubscribe() + }) + + t.Run("Read channel after unsubscribe", func(t *testing.T) { + poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) + err := poller.Start() + require.NoError(t, err) + + poller.Unsubscribe() + require.Equal(t, <-channel, nil) + }) +} diff --git a/pkg/solana/client/multinode/send_only_node.go b/pkg/solana/client/multinode/send_only_node.go new file mode 100644 index 000000000..069911c78 --- /dev/null +++ b/pkg/solana/client/multinode/send_only_node.go @@ -0,0 +1,183 @@ +package client + +import ( + "context" + "fmt" + "net/url" + "sync" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" + + "github.com/smartcontractkit/chainlink/v2/common/types" +) + +type sendOnlyClient[ + CHAIN_ID types.ID, +] interface { + Close() + ChainID(context.Context) (CHAIN_ID, error) + Dial(ctx context.Context) error +} + +// SendOnlyNode represents one node used as a sendonly +type SendOnlyNode[ + CHAIN_ID types.ID, + RPC any, +] interface { + // Start may attempt to connect to the node, but should only return error for misconfiguration - never for temporary errors. + Start(context.Context) error + Close() error + + ConfiguredChainID() CHAIN_ID + RPC() RPC + + String() string + // State returns NodeState + State() NodeState + // Name is a unique identifier for this node. + Name() string +} + +// It only supports sending transactions +// It must use an http(s) url +type sendOnlyNode[ + CHAIN_ID types.ID, + RPC sendOnlyClient[CHAIN_ID], +] struct { + services.StateMachine + + stateMu sync.RWMutex // protects state* fields + state NodeState + + rpc RPC + uri url.URL + log logger.Logger + name string + chainID CHAIN_ID + chStop services.StopChan + wg sync.WaitGroup +} + +// NewSendOnlyNode returns a new sendonly node +func NewSendOnlyNode[ + CHAIN_ID types.ID, + RPC sendOnlyClient[CHAIN_ID], +]( + lggr logger.Logger, + httpuri url.URL, + name string, + chainID CHAIN_ID, + rpc RPC, +) SendOnlyNode[CHAIN_ID, RPC] { + s := new(sendOnlyNode[CHAIN_ID, RPC]) + s.name = name + s.log = logger.Named(logger.Named(lggr, "SendOnlyNode"), name) + s.log = logger.With(s.log, + "nodeTier", "sendonly", + ) + s.rpc = rpc + s.uri = httpuri + s.chainID = chainID + s.chStop = make(chan struct{}) + return s +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) Start(ctx context.Context) error { + return s.StartOnce(s.name, func() error { + s.start(ctx) + return nil + }) +} + +// Start setups up and verifies the sendonly node +// Should only be called once in a node's lifecycle +func (s *sendOnlyNode[CHAIN_ID, RPC]) start(startCtx context.Context) { + if s.State() != NodeStateUndialed { + panic(fmt.Sprintf("cannot dial node with state %v", s.state)) + } + + err := s.rpc.Dial(startCtx) + if err != nil { + promPoolRPCNodeTransitionsToUnusable.WithLabelValues(s.chainID.String(), s.name).Inc() + s.log.Errorw("Dial failed: SendOnly Node is unusable", "err", err) + s.setState(NodeStateUnusable) + return + } + s.setState(NodeStateDialed) + + if s.chainID.String() == "0" { + // Skip verification if chainID is zero + s.log.Warn("sendonly rpc ChainID verification skipped") + } else { + chainID, err := s.rpc.ChainID(startCtx) + if err != nil || chainID.String() != s.chainID.String() { + promPoolRPCNodeTransitionsToUnreachable.WithLabelValues(s.chainID.String(), s.name).Inc() + if err != nil { + promPoolRPCNodeTransitionsToUnreachable.WithLabelValues(s.chainID.String(), s.name).Inc() + s.log.Errorw(fmt.Sprintf("Verify failed: %v", err), "err", err) + s.setState(NodeStateUnreachable) + } else { + promPoolRPCNodeTransitionsToInvalidChainID.WithLabelValues(s.chainID.String(), s.name).Inc() + s.log.Errorf( + "sendonly rpc ChainID doesn't match local chain ID: RPC ID=%s, local ID=%s, node name=%s", + chainID.String(), + s.chainID.String(), + s.name, + ) + s.setState(NodeStateInvalidChainID) + } + // Since it has failed, spin up the verifyLoop that will keep + // retrying until success + s.wg.Add(1) + go s.verifyLoop() + return + } + } + + promPoolRPCNodeTransitionsToAlive.WithLabelValues(s.chainID.String(), s.name).Inc() + s.setState(NodeStateAlive) + s.log.Infow("Sendonly RPC Node is online", "NodeState", s.state) +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) Close() error { + return s.StopOnce(s.name, func() error { + s.rpc.Close() + close(s.chStop) + s.wg.Wait() + s.setState(NodeStateClosed) + return nil + }) +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) ConfiguredChainID() CHAIN_ID { + return s.chainID +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) RPC() RPC { + return s.rpc +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) String() string { + return fmt.Sprintf("(%s)%s:%s", Secondary.String(), s.name, s.uri.Redacted()) +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) setState(state NodeState) (changed bool) { + s.stateMu.Lock() + defer s.stateMu.Unlock() + if s.state == state { + return false + } + s.state = state + return true +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) State() NodeState { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + return s.state +} + +func (s *sendOnlyNode[CHAIN_ID, RPC]) Name() string { + return s.name +} diff --git a/pkg/solana/client/multinode/send_only_node_lifecycle.go b/pkg/solana/client/multinode/send_only_node_lifecycle.go new file mode 100644 index 000000000..a6ac11248 --- /dev/null +++ b/pkg/solana/client/multinode/send_only_node_lifecycle.go @@ -0,0 +1,67 @@ +package client + +import ( + "fmt" + "time" + + "github.com/smartcontractkit/chainlink/v2/common/internal/utils" +) + +// verifyLoop may only be triggered once, on Start, if initial chain ID check +// fails. +// +// It will continue checking until success and then exit permanently. +func (s *sendOnlyNode[CHAIN_ID, RPC]) verifyLoop() { + defer s.wg.Done() + ctx, cancel := s.chStop.NewCtx() + defer cancel() + + backoff := utils.NewRedialBackoff() + for { + select { + case <-ctx.Done(): + return + case <-time.After(backoff.Duration()): + } + chainID, err := s.rpc.ChainID(ctx) + if err != nil { + ok := s.IfStarted(func() { + if changed := s.setState(NodeStateUnreachable); changed { + promPoolRPCNodeTransitionsToUnreachable.WithLabelValues(s.chainID.String(), s.name).Inc() + } + }) + if !ok { + return + } + s.log.Errorw(fmt.Sprintf("Verify failed: %v", err), "err", err) + continue + } else if chainID.String() != s.chainID.String() { + ok := s.IfStarted(func() { + if changed := s.setState(NodeStateInvalidChainID); changed { + promPoolRPCNodeTransitionsToInvalidChainID.WithLabelValues(s.chainID.String(), s.name).Inc() + } + }) + if !ok { + return + } + s.log.Errorf( + "sendonly rpc ChainID doesn't match local chain ID: RPC ID=%s, local ID=%s, node name=%s", + chainID.String(), + s.chainID.String(), + s.name, + ) + + continue + } + ok := s.IfStarted(func() { + if changed := s.setState(NodeStateAlive); changed { + promPoolRPCNodeTransitionsToAlive.WithLabelValues(s.chainID.String(), s.name).Inc() + } + }) + if !ok { + return + } + s.log.Infow("Sendonly RPC Node is online", "NodeState", s.state) + return + } +} diff --git a/pkg/solana/client/multinode/send_only_node_test.go b/pkg/solana/client/multinode/send_only_node_test.go new file mode 100644 index 000000000..352fb5b92 --- /dev/null +++ b/pkg/solana/client/multinode/send_only_node_test.go @@ -0,0 +1,139 @@ +package client + +import ( + "errors" + "fmt" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" + + "github.com/smartcontractkit/chainlink/v2/common/types" +) + +func TestNewSendOnlyNode(t *testing.T) { + t.Parallel() + + urlFormat := "http://user:%s@testurl.com" + password := "pass" + u, err := url.Parse(fmt.Sprintf(urlFormat, password)) + require.NoError(t, err) + redacted := fmt.Sprintf(urlFormat, "xxxxx") + lggr := logger.Test(t) + name := "TestNewSendOnlyNode" + chainID := types.RandomID() + client := newMockSendOnlyClient[types.ID](t) + + node := NewSendOnlyNode(lggr, *u, name, chainID, client) + assert.NotNil(t, node) + + // Must contain name & url with redacted password + assert.Contains(t, node.String(), fmt.Sprintf("%s:%s", name, redacted)) + assert.Equal(t, node.ConfiguredChainID(), chainID) +} + +func TestStartSendOnlyNode(t *testing.T) { + t.Parallel() + t.Run("becomes unusable if initial dial fails", func(t *testing.T) { + t.Parallel() + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + client := newMockSendOnlyClient[types.ID](t) + client.On("Close").Once() + expectedError := errors.New("some http error") + client.On("Dial", mock.Anything).Return(expectedError).Once() + s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), types.RandomID(), client) + + defer func() { assert.NoError(t, s.Close()) }() + err := s.Start(tests.Context(t)) + require.NoError(t, err) + + assert.Equal(t, NodeStateUnusable, s.State()) + tests.RequireLogMessage(t, observedLogs, "Dial failed: SendOnly Node is unusable") + }) + t.Run("Default ChainID(0) produces warn and skips checks", func(t *testing.T) { + t.Parallel() + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + client := newMockSendOnlyClient[types.ID](t) + client.On("Close").Once() + client.On("Dial", mock.Anything).Return(nil).Once() + s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), types.NewIDFromInt(0), client) + + defer func() { assert.NoError(t, s.Close()) }() + err := s.Start(tests.Context(t)) + require.NoError(t, err) + + assert.Equal(t, NodeStateAlive, s.State()) + tests.RequireLogMessage(t, observedLogs, "sendonly rpc ChainID verification skipped") + }) + t.Run("Can recover from chainID verification failure", func(t *testing.T) { + t.Parallel() + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + client := newMockSendOnlyClient[types.ID](t) + client.On("Close").Once() + client.On("Dial", mock.Anything).Return(nil) + expectedError := errors.New("failed to get chain ID") + chainID := types.RandomID() + const failuresCount = 2 + client.On("ChainID", mock.Anything).Return(types.RandomID(), expectedError).Times(failuresCount) + client.On("ChainID", mock.Anything).Return(chainID, nil) + + s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), chainID, client) + + defer func() { assert.NoError(t, s.Close()) }() + err := s.Start(tests.Context(t)) + require.NoError(t, err) + + assert.Equal(t, NodeStateUnreachable, s.State()) + tests.AssertLogCountEventually(t, observedLogs, fmt.Sprintf("Verify failed: %v", expectedError), failuresCount) + tests.AssertEventually(t, func() bool { + return s.State() == NodeStateAlive + }) + }) + t.Run("Can recover from chainID mismatch", func(t *testing.T) { + t.Parallel() + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + client := newMockSendOnlyClient[types.ID](t) + client.On("Close").Once() + client.On("Dial", mock.Anything).Return(nil).Once() + configuredChainID := types.NewIDFromInt(11) + rpcChainID := types.NewIDFromInt(20) + const failuresCount = 2 + client.On("ChainID", mock.Anything).Return(rpcChainID, nil).Times(failuresCount) + client.On("ChainID", mock.Anything).Return(configuredChainID, nil) + s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), configuredChainID, client) + + defer func() { assert.NoError(t, s.Close()) }() + err := s.Start(tests.Context(t)) + require.NoError(t, err) + + assert.Equal(t, NodeStateInvalidChainID, s.State()) + tests.AssertLogCountEventually(t, observedLogs, "sendonly rpc ChainID doesn't match local chain ID", failuresCount) + tests.AssertEventually(t, func() bool { + return s.State() == NodeStateAlive + }) + }) + t.Run("Start with Random ChainID", func(t *testing.T) { + t.Parallel() + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) + client := newMockSendOnlyClient[types.ID](t) + client.On("Close").Once() + client.On("Dial", mock.Anything).Return(nil).Once() + configuredChainID := types.RandomID() + client.On("ChainID", mock.Anything).Return(configuredChainID, nil) + s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), configuredChainID, client) + + defer func() { assert.NoError(t, s.Close()) }() + err := s.Start(tests.Context(t)) + assert.NoError(t, err) + tests.AssertEventually(t, func() bool { + return s.State() == NodeStateAlive + }) + assert.Equal(t, 0, observedLogs.Len()) // No warnings expected + }) +} diff --git a/pkg/solana/client/multinode/transaction_sender.go b/pkg/solana/client/multinode/transaction_sender.go new file mode 100644 index 000000000..a4c5e2b3d --- /dev/null +++ b/pkg/solana/client/multinode/transaction_sender.go @@ -0,0 +1,277 @@ +package client + +import ( + "context" + "fmt" + "math" + "slices" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" +) + +var ( + // PromMultiNodeInvariantViolations reports violation of our assumptions + PromMultiNodeInvariantViolations = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "multi_node_invariant_violations", + Help: "The number of invariant violations", + }, []string{"network", "chainId", "invariant"}) +) + +// TxErrorClassifier - defines interface of a function that transforms raw RPC error into the SendTxReturnCode enum +// (e.g. Successful, Fatal, Retryable, etc.) +type TxErrorClassifier[TX any] func(tx TX, err error) SendTxReturnCode + +type sendTxResult struct { + Err error + ResultCode SendTxReturnCode +} + +const sendTxQuorum = 0.7 + +// SendTxRPCClient - defines interface of an RPC used by TransactionSender to broadcast transaction +type SendTxRPCClient[TX any] interface { + // SendTransaction errors returned should include name or other unique identifier of the RPC + SendTransaction(ctx context.Context, tx TX) error +} + +func NewTransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]]( + lggr logger.Logger, + chainID CHAIN_ID, + chainFamily string, + multiNode *MultiNode[CHAIN_ID, RPC], + txErrorClassifier TxErrorClassifier[TX], + sendTxSoftTimeout time.Duration, +) *TransactionSender[TX, CHAIN_ID, RPC] { + if sendTxSoftTimeout == 0 { + sendTxSoftTimeout = QueryTimeout / 2 + } + return &TransactionSender[TX, CHAIN_ID, RPC]{ + chainID: chainID, + chainFamily: chainFamily, + lggr: logger.Sugared(lggr).Named("TransactionSender").With("chainID", chainID.String()), + multiNode: multiNode, + txErrorClassifier: txErrorClassifier, + sendTxSoftTimeout: sendTxSoftTimeout, + chStop: make(services.StopChan), + } +} + +type TransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]] struct { + services.StateMachine + chainID CHAIN_ID + chainFamily string + lggr logger.SugaredLogger + multiNode *MultiNode[CHAIN_ID, RPC] + txErrorClassifier TxErrorClassifier[TX] + sendTxSoftTimeout time.Duration // defines max waiting time from first response til responses evaluation + + wg sync.WaitGroup // waits for all reporting goroutines to finish + chStop services.StopChan +} + +// SendTransaction - broadcasts transaction to all the send-only and primary nodes in MultiNode. +// A returned nil or error does not guarantee that the transaction will or won't be included. Additional checks must be +// performed to determine the final state. +// +// Send-only nodes' results are ignored as they tend to return false-positive responses. Broadcast to them is necessary +// to speed up the propagation of TX in the network. +// +// Handling of primary nodes' results consists of collection and aggregation. +// In the collection step, we gather as many results as possible while minimizing waiting time. This operation succeeds +// on one of the following conditions: +// * Received at least one success +// * Received at least one result and `sendTxSoftTimeout` expired +// * Received results from the sufficient number of nodes defined by sendTxQuorum. +// The aggregation is based on the following conditions: +// * If there is at least one success - returns success +// * If there is at least one terminal error - returns terminal error +// * If there is both success and terminal error - returns success and reports invariant violation +// * Otherwise, returns any (effectively random) of the errors. +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) SendTransaction(ctx context.Context, tx TX) (SendTxReturnCode, error) { + txResults := make(chan sendTxResult) + txResultsToReport := make(chan sendTxResult) + primaryNodeWg := sync.WaitGroup{} + + ctx, cancel := txSender.chStop.Ctx(ctx) + defer cancel() + + healthyNodesNum := 0 + err := txSender.multiNode.DoAll(ctx, func(ctx context.Context, rpc RPC, isSendOnly bool) { + if isSendOnly { + txSender.wg.Add(1) + go func() { + defer txSender.wg.Done() + // Send-only nodes' results are ignored as they tend to return false-positive responses. + // Broadcast to them is necessary to speed up the propagation of TX in the network. + _ = txSender.broadcastTxAsync(ctx, rpc, tx) + }() + return + } + + // Primary Nodes + healthyNodesNum++ + primaryNodeWg.Add(1) + go func() { + defer primaryNodeWg.Done() + result := txSender.broadcastTxAsync(ctx, rpc, tx) + select { + case <-ctx.Done(): + return + case txResults <- result: + } + + select { + case <-ctx.Done(): + return + case txResultsToReport <- result: + } + }() + }) + if err != nil { + primaryNodeWg.Wait() + close(txResultsToReport) + close(txResults) + return 0, err + } + + // This needs to be done in parallel so the reporting knows when it's done (when the channel is closed) + txSender.wg.Add(1) + go func() { + defer txSender.wg.Done() + primaryNodeWg.Wait() + close(txResultsToReport) + close(txResults) + }() + + txSender.wg.Add(1) + go txSender.reportSendTxAnomalies(tx, txResultsToReport) + + return txSender.collectTxResults(ctx, tx, healthyNodesNum, txResults) +} + +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) broadcastTxAsync(ctx context.Context, rpc RPC, tx TX) sendTxResult { + txErr := rpc.SendTransaction(ctx, tx) + txSender.lggr.Debugw("Node sent transaction", "tx", tx, "err", txErr) + resultCode := txSender.txErrorClassifier(tx, txErr) + if !slices.Contains(sendTxSuccessfulCodes, resultCode) { + txSender.lggr.Warnw("RPC returned error", "tx", tx, "err", txErr) + } + return sendTxResult{Err: txErr, ResultCode: resultCode} +} + +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) reportSendTxAnomalies(tx TX, txResults <-chan sendTxResult) { + defer txSender.wg.Done() + resultsByCode := sendTxErrors{} + // txResults eventually will be closed + for txResult := range txResults { + resultsByCode[txResult.ResultCode] = append(resultsByCode[txResult.ResultCode], txResult.Err) + } + + _, _, criticalErr := aggregateTxResults(resultsByCode) + if criticalErr != nil { + txSender.lggr.Criticalw("observed invariant violation on SendTransaction", "tx", tx, "resultsByCode", resultsByCode, "err", criticalErr) + PromMultiNodeInvariantViolations.WithLabelValues(txSender.chainFamily, txSender.chainID.String(), criticalErr.Error()).Inc() + } +} + +type sendTxErrors map[SendTxReturnCode][]error + +func aggregateTxResults(resultsByCode sendTxErrors) (returnCode SendTxReturnCode, txResult error, err error) { + severeCode, severeErrors, hasSevereErrors := findFirstIn(resultsByCode, sendTxSevereErrors) + successCode, successResults, hasSuccess := findFirstIn(resultsByCode, sendTxSuccessfulCodes) + if hasSuccess { + // We assume that primary node would never report false positive txResult for a transaction. + // Thus, if such case occurs it's probably due to misconfiguration or a bug and requires manual intervention. + if hasSevereErrors { + const errMsg = "found contradictions in nodes replies on SendTransaction: got success and severe error" + // return success, since at least 1 node has accepted our broadcasted Tx, and thus it can now be included onchain + return successCode, successResults[0], fmt.Errorf(errMsg) + } + + // other errors are temporary - we are safe to return success + return successCode, successResults[0], nil + } + + if hasSevereErrors { + return severeCode, severeErrors[0], nil + } + + // return temporary error + for code, result := range resultsByCode { + return code, result[0], nil + } + + err = fmt.Errorf("expected at least one response on SendTransaction") + return 0, err, err +} + +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) collectTxResults(ctx context.Context, tx TX, healthyNodesNum int, txResults <-chan sendTxResult) (SendTxReturnCode, error) { + if healthyNodesNum == 0 { + return 0, ErroringNodeError + } + requiredResults := int(math.Ceil(float64(healthyNodesNum) * sendTxQuorum)) + errorsByCode := sendTxErrors{} + var softTimeoutChan <-chan time.Time + var resultsCount int +loop: + for { + select { + case <-ctx.Done(): + txSender.lggr.Debugw("Failed to collect of the results before context was done", "tx", tx, "errorsByCode", errorsByCode) + return 0, ctx.Err() + case result := <-txResults: + errorsByCode[result.ResultCode] = append(errorsByCode[result.ResultCode], result.Err) + resultsCount++ + if slices.Contains(sendTxSuccessfulCodes, result.ResultCode) || resultsCount >= requiredResults { + break loop + } + case <-softTimeoutChan: + txSender.lggr.Debugw("Send Tx soft timeout expired - returning responses we've collected so far", "tx", tx, "resultsCount", resultsCount, "requiredResults", requiredResults) + break loop + } + + if softTimeoutChan == nil { + tm := time.NewTimer(txSender.sendTxSoftTimeout) + softTimeoutChan = tm.C + // we are fine with stopping timer at the end of function + //nolint + defer tm.Stop() + } + } + + // ignore critical error as it's reported in reportSendTxAnomalies + returnCode, result, _ := aggregateTxResults(errorsByCode) + return returnCode, result +} + +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) Start(ctx context.Context) error { + return txSender.StartOnce("TransactionSender", func() error { + return nil + }) +} + +func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) Close() error { + return txSender.StopOnce("TransactionSender", func() error { + close(txSender.chStop) + txSender.wg.Wait() + return nil + }) +} + +// findFirstIn - returns the first existing key and value for the slice of keys +func findFirstIn[K comparable, V any](set map[K]V, keys []K) (K, V, bool) { + for _, k := range keys { + if v, ok := set[k]; ok { + return k, v, true + } + } + var zeroK K + var zeroV V + return zeroK, zeroV, false +} diff --git a/pkg/solana/client/multinode/transaction_sender_test.go b/pkg/solana/client/multinode/transaction_sender_test.go new file mode 100644 index 000000000..e4387abee --- /dev/null +++ b/pkg/solana/client/multinode/transaction_sender_test.go @@ -0,0 +1,360 @@ +package client + +import ( + "context" + "fmt" + "testing" + + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" + "github.com/smartcontractkit/chainlink/v2/common/types" +) + +type sendTxMultiNode struct { + *MultiNode[types.ID, SendTxRPCClient[any]] +} + +type sendTxRPC struct { + sendTxRun func(args mock.Arguments) + sendTxErr error +} + +var _ SendTxRPCClient[any] = (*sendTxRPC)(nil) + +func newSendTxRPC(sendTxErr error, sendTxRun func(args mock.Arguments)) *sendTxRPC { + return &sendTxRPC{sendTxErr: sendTxErr, sendTxRun: sendTxRun} +} + +func (rpc *sendTxRPC) SendTransaction(ctx context.Context, _ any) error { + if rpc.sendTxRun != nil { + rpc.sendTxRun(mock.Arguments{ctx}) + } + return rpc.sendTxErr +} + +func newTestTransactionSender(t *testing.T, chainID types.ID, lggr logger.Logger, + nodes []Node[types.ID, SendTxRPCClient[any]], + sendOnlyNodes []SendOnlyNode[types.ID, SendTxRPCClient[any]], +) (*sendTxMultiNode, *TransactionSender[any, types.ID, SendTxRPCClient[any]]) { + mn := sendTxMultiNode{NewMultiNode[types.ID, SendTxRPCClient[any]]( + lggr, NodeSelectionModeRoundRobin, 0, nodes, sendOnlyNodes, chainID, "chainFamily", 0)} + err := mn.StartOnce("startedTestMultiNode", func() error { return nil }) + require.NoError(t, err) + + txSender := NewTransactionSender[any, types.ID, SendTxRPCClient[any]](lggr, chainID, mn.chainFamily, mn.MultiNode, classifySendTxError, tests.TestInterval) + err = txSender.Start(tests.Context(t)) + require.NoError(t, err) + + t.Cleanup(func() { + err := mn.Close() + if err != nil { + // Allow MultiNode to be closed early for testing + require.EqualError(t, err, "MultiNode has already been stopped: already stopped") + } + err = txSender.Close() + if err != nil { + // Allow TransactionSender to be closed early for testing + require.EqualError(t, err, "TransactionSender has already been stopped: already stopped") + } + }) + return &mn, txSender +} + +func classifySendTxError(_ any, err error) SendTxReturnCode { + if err != nil { + return Fatal + } + return Successful +} + +func TestTransactionSender_SendTransaction(t *testing.T) { + t.Parallel() + + newNodeWithState := func(t *testing.T, state NodeState, txErr error, sendTxRun func(args mock.Arguments)) *mockNode[types.ID, SendTxRPCClient[any]] { + rpc := newSendTxRPC(txErr, sendTxRun) + node := newMockNode[types.ID, SendTxRPCClient[any]](t) + node.On("String").Return("node name").Maybe() + node.On("RPC").Return(rpc).Maybe() + node.On("State").Return(state).Maybe() + node.On("Close").Return(nil).Once() + return node + } + + newNode := func(t *testing.T, txErr error, sendTxRun func(args mock.Arguments)) *mockNode[types.ID, SendTxRPCClient[any]] { + return newNodeWithState(t, NodeStateAlive, txErr, sendTxRun) + } + + t.Run("Fails if there is no nodes available", func(t *testing.T) { + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, nil, nil) + _, err := txSender.SendTransaction(tests.Context(t), nil) + assert.EqualError(t, err, ErroringNodeError.Error()) + }) + + t.Run("Transaction failure happy path", func(t *testing.T) { + expectedError := errors.New("transaction failed") + mainNode := newNode(t, expectedError, nil) + lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, + []Node[types.ID, SendTxRPCClient[any]]{mainNode}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{newNode(t, errors.New("unexpected error"), nil)}) + + result, sendErr := txSender.SendTransaction(tests.Context(t), nil) + require.ErrorIs(t, sendErr, expectedError) + require.Equal(t, Fatal, result) + tests.AssertLogCountEventually(t, observedLogs, "Node sent transaction", 2) + tests.AssertLogCountEventually(t, observedLogs, "RPC returned error", 2) + }) + + t.Run("Transaction success happy path", func(t *testing.T) { + mainNode := newNode(t, nil, nil) + + lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) + _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, + []Node[types.ID, SendTxRPCClient[any]]{mainNode}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{newNode(t, errors.New("unexpected error"), nil)}) + + result, sendErr := txSender.SendTransaction(tests.Context(t), nil) + require.NoError(t, sendErr) + require.Equal(t, Successful, result) + tests.AssertLogCountEventually(t, observedLogs, "Node sent transaction", 2) + tests.AssertLogCountEventually(t, observedLogs, "RPC returned error", 1) + }) + + t.Run("Context expired before collecting sufficient results", func(t *testing.T) { + testContext, testCancel := context.WithCancel(tests.Context(t)) + defer testCancel() + + mainNode := newNode(t, nil, func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, + []Node[types.ID, SendTxRPCClient[any]]{mainNode}, nil) + + requestContext, cancel := context.WithCancel(tests.Context(t)) + cancel() + _, sendErr := txSender.SendTransaction(requestContext, nil) + require.EqualError(t, sendErr, "context canceled") + }) + + t.Run("Soft timeout stops results collection", func(t *testing.T) { + chainID := types.RandomID() + expectedError := errors.New("transaction failed") + fastNode := newNode(t, expectedError, nil) + + // hold reply from the node till end of the test + testContext, testCancel := context.WithCancel(tests.Context(t)) + defer testCancel() + slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, chainID, lggr, []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, nil) + _, sendErr := txSender.SendTransaction(tests.Context(t), nil) + require.EqualError(t, sendErr, expectedError.Error()) + }) + t.Run("Fails when multinode is closed", func(t *testing.T) { + chainID := types.RandomID() + fastNode := newNode(t, nil, nil) + // hold reply from the node till end of the test + testContext, testCancel := context.WithCancel(tests.Context(t)) + defer testCancel() + slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + slowSendOnly := newNode(t, errors.New("send only failed"), func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + mn, txSender := newTestTransactionSender(t, chainID, lggr, + []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{slowSendOnly}) + + require.NoError(t, mn.Close()) + _, err := txSender.SendTransaction(tests.Context(t), nil) + require.EqualError(t, err, "MultiNode is stopped") + }) + t.Run("Fails when closed", func(t *testing.T) { + chainID := types.RandomID() + fastNode := newNode(t, nil, nil) + // hold reply from the node till end of the test + testContext, testCancel := context.WithCancel(tests.Context(t)) + defer testCancel() + slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + slowSendOnly := newNode(t, errors.New("send only failed"), func(_ mock.Arguments) { + // block caller til end of the test + <-testContext.Done() + }) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, chainID, lggr, + []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{slowSendOnly}) + + require.NoError(t, txSender.Close()) + _, err := txSender.SendTransaction(tests.Context(t), nil) + require.EqualError(t, err, "context canceled") + }) + t.Run("Returns error if there is no healthy primary nodes", func(t *testing.T) { + chainID := types.RandomID() + primary := newNodeWithState(t, NodeStateUnreachable, nil, nil) + sendOnly := newNodeWithState(t, NodeStateUnreachable, nil, nil) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, chainID, lggr, + []Node[types.ID, SendTxRPCClient[any]]{primary}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{sendOnly}) + + _, sendErr := txSender.SendTransaction(tests.Context(t), nil) + assert.EqualError(t, sendErr, ErroringNodeError.Error()) + }) + + t.Run("Transaction success even if one of the nodes is unhealthy", func(t *testing.T) { + chainID := types.RandomID() + mainNode := newNode(t, nil, nil) + unexpectedCall := func(args mock.Arguments) { + panic("SendTx must not be called for unhealthy node") + } + unhealthyNode := newNodeWithState(t, NodeStateUnreachable, nil, unexpectedCall) + unhealthySendOnlyNode := newNodeWithState(t, NodeStateUnreachable, nil, unexpectedCall) + + lggr, _ := logger.TestObserved(t, zap.DebugLevel) + + _, txSender := newTestTransactionSender(t, chainID, lggr, + []Node[types.ID, SendTxRPCClient[any]]{mainNode, unhealthyNode}, + []SendOnlyNode[types.ID, SendTxRPCClient[any]]{unhealthySendOnlyNode}) + + returnCode, sendErr := txSender.SendTransaction(tests.Context(t), nil) + require.NoError(t, sendErr) + require.Equal(t, Successful, returnCode) + }) +} + +func TestTransactionSender_SendTransaction_aggregateTxResults(t *testing.T) { + t.Parallel() + // ensure failure on new SendTxReturnCode + codesToCover := map[SendTxReturnCode]struct{}{} + for code := Successful; code < sendTxReturnCodeLen; code++ { + codesToCover[code] = struct{}{} + } + + testCases := []struct { + Name string + ExpectedTxResult string + ExpectedCriticalErr string + ResultsByCode sendTxErrors + }{ + { + Name: "Returns success and logs critical error on success and Fatal", + ExpectedTxResult: "success", + ExpectedCriticalErr: "found contradictions in nodes replies on SendTransaction: got success and severe error", + ResultsByCode: sendTxErrors{ + Successful: {errors.New("success")}, + Fatal: {errors.New("fatal")}, + }, + }, + { + Name: "Returns TransactionAlreadyKnown and logs critical error on TransactionAlreadyKnown and Fatal", + ExpectedTxResult: "tx_already_known", + ExpectedCriticalErr: "found contradictions in nodes replies on SendTransaction: got success and severe error", + ResultsByCode: sendTxErrors{ + TransactionAlreadyKnown: {errors.New("tx_already_known")}, + Unsupported: {errors.New("unsupported")}, + }, + }, + { + Name: "Prefers sever error to temporary", + ExpectedTxResult: "underpriced", + ExpectedCriticalErr: "", + ResultsByCode: sendTxErrors{ + Retryable: {errors.New("retryable")}, + Underpriced: {errors.New("underpriced")}, + }, + }, + { + Name: "Returns temporary error", + ExpectedTxResult: "retryable", + ExpectedCriticalErr: "", + ResultsByCode: sendTxErrors{ + Retryable: {errors.New("retryable")}, + }, + }, + { + Name: "Insufficient funds is treated as error", + ExpectedTxResult: "", + ExpectedCriticalErr: "", + ResultsByCode: sendTxErrors{ + Successful: {nil}, + InsufficientFunds: {errors.New("insufficientFunds")}, + }, + }, + { + Name: "Logs critical error on empty ResultsByCode", + ExpectedTxResult: "expected at least one response on SendTransaction", + ExpectedCriticalErr: "expected at least one response on SendTransaction", + ResultsByCode: sendTxErrors{}, + }, + { + Name: "Zk terminally stuck", + ExpectedTxResult: "not enough keccak counters to continue the execution", + ExpectedCriticalErr: "", + ResultsByCode: sendTxErrors{ + TerminallyStuck: {errors.New("not enough keccak counters to continue the execution")}, + }, + }, + } + + for _, testCase := range testCases { + for code := range testCase.ResultsByCode { + delete(codesToCover, code) + } + + t.Run(testCase.Name, func(t *testing.T) { + _, txResult, err := aggregateTxResults(testCase.ResultsByCode) + if testCase.ExpectedTxResult == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, txResult, testCase.ExpectedTxResult) + } + + logger.Sugared(logger.Test(t)).Info("Map: " + fmt.Sprint(testCase.ResultsByCode)) + logger.Sugared(logger.Test(t)).Criticalw("observed invariant violation on SendTransaction", "resultsByCode", testCase.ResultsByCode, "err", err) + + if testCase.ExpectedCriticalErr == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, testCase.ExpectedCriticalErr) + } + }) + } + + // explicitly signal that following codes are properly handled in aggregateTxResults, + // but dedicated test cases won't be beneficial + for _, codeToIgnore := range []SendTxReturnCode{Unknown, ExceedsMaxFee, FeeOutOfValidRange} { + delete(codesToCover, codeToIgnore) + } + assert.Empty(t, codesToCover, "all of the SendTxReturnCode must be covered by this test") +} diff --git a/pkg/solana/client/multinode/types.go b/pkg/solana/client/multinode/types.go new file mode 100644 index 000000000..5cd831fc1 --- /dev/null +++ b/pkg/solana/client/multinode/types.go @@ -0,0 +1,124 @@ +package client + +import ( + "context" + "fmt" + "math/big" +) + +// A chain-agnostic generic interface to represent the following native types on various chains: +// PublicKey, Address, Account, BlockHash, TxHash +type Hashable interface { + fmt.Stringer + comparable + + Bytes() []byte +} + +// Subscription represents an event subscription where events are +// delivered on a data channel. +// This is a generic interface for Subscription to represent used by clients. +type Subscription interface { + // Unsubscribe cancels the sending of events to the data channel + // and closes the error channel. Unsubscribe should be callable multiple + // times without causing an error. + Unsubscribe() + // Err returns the subscription error channel. The error channel receives + // a value if there is an issue with the subscription (e.g. the network connection + // delivering the events has been closed). Only one value will ever be sent. + // The error channel is closed by Unsubscribe. + Err() <-chan error +} + +// RPCClient includes all the necessary generalized RPC methods along with any additional chain-specific methods. +type RPCClient[ + CHAIN_ID ID, + HEAD Head, +] interface { + // ChainID - fetches ChainID from the RPC to verify that it matches config + ChainID(ctx context.Context) (CHAIN_ID, error) + // Dial - prepares the RPC for usage. Can be called on fresh or closed RPC + Dial(ctx context.Context) error + // SubscribeToHeads - returns channel and subscription for new heads. + SubscribeToHeads(ctx context.Context) (<-chan HEAD, Subscription, error) + // SubscribeToFinalizedHeads - returns channel and subscription for finalized heads. + SubscribeToFinalizedHeads(ctx context.Context) (<-chan HEAD, Subscription, error) + // Ping - returns error if RPC is not reachable + Ping(context.Context) error + // IsSyncing - returns true if the RPC is in Syncing state and can not process calls + IsSyncing(ctx context.Context) (bool, error) + // UnsubscribeAllExcept - close all subscriptions except `subs` + UnsubscribeAllExcept(subs ...Subscription) + // Close - closes all subscriptions and aborts all RPC calls + Close() + // GetInterceptedChainInfo - returns latest and highest observed by application layer ChainInfo. + // latest ChainInfo is the most recent value received within a NodeClient's current lifecycle between Dial and DisconnectAll. + // highestUserObservations ChainInfo is the highest ChainInfo observed excluding health checks calls. + // Its values must not be reset. + // The results of corresponding calls, to get the most recent head and the latest finalized head, must be + // intercepted and reflected in ChainInfo before being returned to a caller. Otherwise, MultiNode is not able to + // provide repeatable read guarantee. + // DisconnectAll must reset latest ChainInfo to default value. + // Ensure implementation does not have a race condition when values are reset before request completion and as + // a result latest ChainInfo contains information from the previous cycle. + GetInterceptedChainInfo() (latest, highestUserObservations ChainInfo) +} + +// Head is the interface required by the NodeClient +type Head interface { + BlockNumber() int64 + BlockDifficulty() *big.Int + IsValid() bool +} + +// PoolChainInfoProvider - provides aggregation of nodes pool ChainInfo +type PoolChainInfoProvider interface { + // LatestChainInfo - returns number of live nodes available in the pool, so we can prevent the last alive node in a pool from being + // moved to out-of-sync state. It is better to have one out-of-sync node than no nodes at all. + // Returns highest latest ChainInfo within the alive nodes. E.g. most recent block number and highest block number + // observed by Node A are 10 and 15; Node B - 12 and 14. This method will return 12. + LatestChainInfo() (int, ChainInfo) + // HighestUserObservations - returns highest ChainInfo ever observed by any user of MultiNode. + HighestUserObservations() ChainInfo +} + +// ChainInfo - defines RPC's or MultiNode's view on the chain +type ChainInfo struct { + BlockNumber int64 + FinalizedBlockNumber int64 + TotalDifficulty *big.Int +} + +func MaxTotalDifficulty(a, b *big.Int) *big.Int { + if a == nil { + if b == nil { + return nil + } + + return big.NewInt(0).Set(b) + } + + if b == nil || a.Cmp(b) >= 0 { + return big.NewInt(0).Set(a) + } + + return big.NewInt(0).Set(b) +} + +// ID represents the base type, for any chain's ID. +// It should be convertible to a string, that can uniquely identify this chain +type ID fmt.Stringer + +type multiNodeContextKey int + +const ( + contextKeyHeathCheckRequest multiNodeContextKey = iota + 1 +) + +func CtxAddHealthCheckFlag(ctx context.Context) context.Context { + return context.WithValue(ctx, contextKeyHeathCheckRequest, struct{}{}) +} + +func CtxIsHeathCheckRequest(ctx context.Context) bool { + return ctx.Value(contextKeyHeathCheckRequest) != nil +} diff --git a/pkg/solana/client/rpc_client.go b/pkg/solana/client/rpc_client.go new file mode 100644 index 000000000..0db8d9062 --- /dev/null +++ b/pkg/solana/client/rpc_client.go @@ -0,0 +1,318 @@ +package client + +import ( + "context" + "errors" + "fmt" + "math/big" + "time" + + "github.com/gagliardetto/solana-go" + "github.com/gagliardetto/solana-go/rpc" + "golang.org/x/sync/singleflight" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" +) + +type StringID string + +func (s StringID) String() string { + return string(s) +} + +// TODO: ChainReaderWriter needs ChainID() (string, error) +// TODO: We probably don't need this though? +var _ ReaderWriter = (*RpcClient)(nil) + +type Head struct { + rpc.GetBlockResult +} + +func (h *Head) BlockNumber() int64 { + if h.BlockHeight == nil { + return 0 + } + return int64(*h.BlockHeight) +} + +func (h *Head) BlockDifficulty() *big.Int { + return nil +} + +func (h *Head) IsValid() bool { + return true +} + +type RpcClient struct { + url string + rpc *rpc.Client + skipPreflight bool // to enable or disable preflight checks + commitment rpc.CommitmentType + maxRetries *uint + txTimeout time.Duration + contextDuration time.Duration + log logger.Logger + + // provides a duplicate function call suppression mechanism + requestGroup *singleflight.Group +} + +func (c *RpcClient) Dial(ctx context.Context) error { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) SubscribeToHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) Ping(ctx context.Context) error { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) IsSyncing(ctx context.Context) (bool, error) { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) UnsubscribeAllExcept(subs ...mn.Subscription) { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) Close() { + //TODO implement me + panic("implement me") +} + +func (c *RpcClient) GetInterceptedChainInfo() (latest, highestUserObservations mn.ChainInfo) { + //TODO implement me + panic("implement me") +} + +func NewRpcClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*RpcClient, error) { + return &RpcClient{ + url: endpoint, + rpc: rpc.New(endpoint), + skipPreflight: cfg.SkipPreflight(), + commitment: cfg.Commitment(), + maxRetries: cfg.MaxRetries(), + txTimeout: cfg.TxTimeout(), + contextDuration: requestTimeout, + log: log, + requestGroup: &singleflight.Group{}, + }, nil +} + +func (c *RpcClient) latency(name string) func() { + start := time.Now() + return func() { + monitor.SetClientLatency(time.Since(start), name, c.url) + } +} + +func (c *RpcClient) Balance(addr solana.PublicKey) (uint64, error) { + done := c.latency("balance") + defer done() + + ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) + defer cancel() + + v, err, _ := c.requestGroup.Do(fmt.Sprintf("GetBalance(%s)", addr.String()), func() (interface{}, error) { + return c.rpc.GetBalance(ctx, addr, c.commitment) + }) + if err != nil { + return 0, err + } + res := v.(*rpc.GetBalanceResult) + return res.Value, err +} + +func (c *RpcClient) SlotHeight() (uint64, error) { + return c.SlotHeightWithCommitment(rpc.CommitmentProcessed) // get the latest slot height +} + +func (c *RpcClient) SlotHeightWithCommitment(commitment rpc.CommitmentType) (uint64, error) { + done := c.latency("slot_height") + defer done() + + ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) + defer cancel() + v, err, _ := c.requestGroup.Do("GetSlotHeight", func() (interface{}, error) { + return c.rpc.GetSlot(ctx, commitment) + }) + return v.(uint64), err +} + +func (c *RpcClient) GetAccountInfoWithOpts(ctx context.Context, addr solana.PublicKey, opts *rpc.GetAccountInfoOpts) (*rpc.GetAccountInfoResult, error) { + done := c.latency("account_info") + defer done() + + ctx, cancel := context.WithTimeout(ctx, c.contextDuration) + defer cancel() + opts.Commitment = c.commitment // overrides passed in value - use defined client commitment type + return c.rpc.GetAccountInfoWithOpts(ctx, addr, opts) +} + +func (c *RpcClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { + done := c.latency("latest_blockhash") + defer done() + + ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) + defer cancel() + + v, err, _ := c.requestGroup.Do("GetLatestBlockhash", func() (interface{}, error) { + return c.rpc.GetLatestBlockhash(ctx, c.commitment) + }) + return v.(*rpc.GetLatestBlockhashResult), err +} + +func (c *RpcClient) ChainID(ctx context.Context) (StringID, error) { + done := c.latency("chain_id") + defer done() + + ctx, cancel := context.WithTimeout(ctx, c.contextDuration) + defer cancel() + v, err, _ := c.requestGroup.Do("GetGenesisHash", func() (interface{}, error) { + return c.rpc.GetGenesisHash(ctx) + }) + if err != nil { + return "", err + } + hash := v.(solana.Hash) + + var network string + switch hash.String() { + case DevnetGenesisHash: + network = "devnet" + case TestnetGenesisHash: + network = "testnet" + case MainnetGenesisHash: + network = "mainnet" + default: + c.log.Warnf("unknown genesis hash - assuming solana chain is 'localnet'") + network = "localnet" + } + return StringID(network), nil +} + +func (c *RpcClient) GetFeeForMessage(msg string) (uint64, error) { + done := c.latency("fee_for_message") + defer done() + + // msg is base58 encoded data + + ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) + defer cancel() + res, err := c.rpc.GetFeeForMessage(ctx, msg, c.commitment) + if err != nil { + return 0, fmt.Errorf("error in GetFeeForMessage: %w", err) + } + + if res == nil || res.Value == nil { + return 0, errors.New("nil pointer in GetFeeForMessage") + } + return *res.Value, nil +} + +// https://docs.solana.com/developing/clients/jsonrpc-api#getsignaturestatuses +func (c *RpcClient) SignatureStatuses(ctx context.Context, sigs []solana.Signature) ([]*rpc.SignatureStatusesResult, error) { + done := c.latency("signature_statuses") + defer done() + + ctx, cancel := context.WithTimeout(ctx, c.contextDuration) + defer cancel() + + // searchTransactionHistory = false + res, err := c.rpc.GetSignatureStatuses(ctx, false, sigs...) + if err != nil { + return nil, fmt.Errorf("error in GetSignatureStatuses: %w", err) + } + + if res == nil || res.Value == nil { + return nil, errors.New("nil pointer in GetSignatureStatuses") + } + return res.Value, nil +} + +// https://docs.solana.com/developing/clients/jsonrpc-api#simulatetransaction +// opts - (optional) use `nil` to use defaults +func (c *RpcClient) SimulateTx(ctx context.Context, tx *solana.Transaction, opts *rpc.SimulateTransactionOpts) (*rpc.SimulateTransactionResult, error) { + done := c.latency("simulate_tx") + defer done() + + ctx, cancel := context.WithTimeout(ctx, c.contextDuration) + defer cancel() + + if opts == nil { + opts = &rpc.SimulateTransactionOpts{ + SigVerify: true, // verify signature + Commitment: c.commitment, + } + } + + res, err := c.rpc.SimulateTransactionWithOpts(ctx, tx, opts) + if err != nil { + return nil, fmt.Errorf("error in SimulateTransactionWithOpts: %w", err) + } + + if res == nil || res.Value == nil { + return nil, errors.New("nil pointer in SimulateTransactionWithOpts") + } + + return res.Value, nil +} + +func (c *RpcClient) SendTransaction(ctx context.Context, tx *solana.Transaction) error { + // TODO: Implement + return nil +} + +func (c *RpcClient) SendTx(ctx context.Context, tx *solana.Transaction) (solana.Signature, error) { + done := c.latency("send_tx") + defer done() + + ctx, cancel := context.WithTimeout(ctx, c.txTimeout) + defer cancel() + + opts := rpc.TransactionOpts{ + SkipPreflight: c.skipPreflight, + PreflightCommitment: c.commitment, + MaxRetries: c.maxRetries, + } + + return c.rpc.SendTransactionWithOpts(ctx, tx, opts) +} + +func (c *RpcClient) GetLatestBlock() (*rpc.GetBlockResult, error) { + // get latest confirmed slot + slot, err := c.SlotHeightWithCommitment(c.commitment) + if err != nil { + return nil, fmt.Errorf("GetLatestBlock.SlotHeight: %w", err) + } + + // get block based on slot + done := c.latency("latest_block") + defer done() + ctx, cancel := context.WithTimeout(context.Background(), c.txTimeout) + defer cancel() + v, err, _ := c.requestGroup.Do("GetBlockWithOpts", func() (interface{}, error) { + version := uint64(0) // pull all tx types (legacy + v0) + return c.rpc.GetBlockWithOpts(ctx, slot, &rpc.GetBlockOpts{ + Commitment: c.commitment, + MaxSupportedTransactionVersion: &version, + }) + }) + return v.(*rpc.GetBlockResult), err +} diff --git a/pkg/solana/config/multinode.go b/pkg/solana/config/multinode.go new file mode 100644 index 000000000..72e345728 --- /dev/null +++ b/pkg/solana/config/multinode.go @@ -0,0 +1,86 @@ +package config + +import "time" + +type MultiNode struct { + // TODO: Determine current config overlap https://smartcontract-it.atlassian.net/browse/BCI-4065 + // Feature flag + multiNodeEnabled bool + + // Node Configs + pollFailureThreshold uint32 + pollInterval time.Duration + selectionMode string + syncThreshold uint32 + nodeIsSyncingEnabled bool + finalizedBlockPollInterval time.Duration + enforceRepeatableRead bool + deathDeclarationDelay time.Duration + + // Chain Configs + nodeNoNewHeadsThreshold time.Duration + noNewFinalizedHeadsThreshold time.Duration + finalityDepth uint32 + finalityTagEnabled bool + finalizedBlockOffset uint32 +} + +func (c *MultiNode) MultiNodeEnabled() bool { + return c.multiNodeEnabled +} + +func (c *MultiNode) PollFailureThreshold() uint32 { + return c.pollFailureThreshold +} + +func (c *MultiNode) PollInterval() time.Duration { + return c.pollInterval +} + +func (c *MultiNode) SelectionMode() string { + return c.selectionMode +} + +func (c *MultiNode) SyncThreshold() uint32 { + return c.syncThreshold +} + +func (c *MultiNode) NodeIsSyncingEnabled() bool { + return c.nodeIsSyncingEnabled +} + +func (c *MultiNode) FinalizedBlockPollInterval() time.Duration { + return c.finalizedBlockPollInterval +} + +func (c *MultiNode) EnforceRepeatableRead() bool { + return c.enforceRepeatableRead +} + +func (c *MultiNode) DeathDeclarationDelay() time.Duration { + return c.deathDeclarationDelay +} + +func (c *MultiNode) NodeNoNewHeadsThreshold() time.Duration { + return c.nodeNoNewHeadsThreshold +} + +func (c *MultiNode) NoNewFinalizedHeadsThreshold() time.Duration { + return c.noNewFinalizedHeadsThreshold +} + +func (c *MultiNode) FinalityDepth() uint32 { + return c.finalityDepth +} + +func (c *MultiNode) FinalityTagEnabled() bool { + return c.finalityTagEnabled +} + +func (c *MultiNode) FinalizedBlockOffset() uint32 { + return c.finalizedBlockOffset +} + +func (c *MultiNode) SetDefaults() { + // TODO: Set defaults for MultiNode config https://smartcontract-it.atlassian.net/browse/BCI-4065 +} diff --git a/pkg/solana/config/toml.go b/pkg/solana/config/toml.go index e5eb705e6..b1cdfc7f5 100644 --- a/pkg/solana/config/toml.go +++ b/pkg/solana/config/toml.go @@ -112,6 +112,7 @@ type TOMLConfig struct { // Do not access directly, use [IsEnabled] Enabled *bool Chain + MultiNode Nodes Nodes } @@ -279,8 +280,13 @@ func (c *TOMLConfig) ListNodes() Nodes { return c.Nodes } +func (c *TOMLConfig) MultiNodeConfig() *MultiNode { + return &c.MultiNode +} + func NewDefault() *TOMLConfig { cfg := &TOMLConfig{} - cfg.SetDefaults() + cfg.Chain.SetDefaults() + cfg.MultiNode.SetDefaults() return cfg } From 1c684859c1d49f1d4248028585932edb82b1fba5 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 3 Sep 2024 11:48:41 -0400 Subject: [PATCH 02/22] Update MultiNode files --- pkg/solana/client/multinode/ctx.go | 17 +++ pkg/solana/client/multinode/node.go | 7 +- pkg/solana/client/multinode/node_fsm.go | 8 +- pkg/solana/client/multinode/node_lifecycle.go | 95 ++++---------- pkg/solana/client/multinode/node_selector.go | 6 +- .../multinode/node_selector_highest_head.go | 38 ++++++ .../multinode/node_selector_priority_level.go | 121 ++++++++++++++++++ .../multinode/node_selector_round_robin.go | 46 +++++++ .../node_selector_total_difficulty.go | 51 ++++++++ pkg/solana/client/multinode/poller.go | 58 ++++----- pkg/solana/client/multinode/redialbackoff.go | 17 +++ pkg/solana/client/multinode/send_only_node.go | 2 +- .../client/multinode/transaction_sender.go | 28 ++-- pkg/solana/client/multinode/types.go | 31 +---- pkg/solana/client/rpc_client.go | 4 +- 15 files changed, 371 insertions(+), 158 deletions(-) create mode 100644 pkg/solana/client/multinode/ctx.go create mode 100644 pkg/solana/client/multinode/node_selector_highest_head.go create mode 100644 pkg/solana/client/multinode/node_selector_priority_level.go create mode 100644 pkg/solana/client/multinode/node_selector_round_robin.go create mode 100644 pkg/solana/client/multinode/node_selector_total_difficulty.go create mode 100644 pkg/solana/client/multinode/redialbackoff.go diff --git a/pkg/solana/client/multinode/ctx.go b/pkg/solana/client/multinode/ctx.go new file mode 100644 index 000000000..57b2fc8a8 --- /dev/null +++ b/pkg/solana/client/multinode/ctx.go @@ -0,0 +1,17 @@ +package client + +import "context" + +type multiNodeContextKey int + +const ( + contextKeyHeathCheckRequest multiNodeContextKey = iota + 1 +) + +func CtxAddHealthCheckFlag(ctx context.Context) context.Context { + return context.WithValue(ctx, contextKeyHeathCheckRequest, struct{}{}) +} + +func CtxIsHeathCheckRequest(ctx context.Context) bool { + return ctx.Value(contextKeyHeathCheckRequest) != nil +} diff --git a/pkg/solana/client/multinode/node.go b/pkg/solana/client/multinode/node.go index c3532b1a1..8ab30f856 100644 --- a/pkg/solana/client/multinode/node.go +++ b/pkg/solana/client/multinode/node.go @@ -110,8 +110,7 @@ type node[ // wg waits for subsidiary goroutines wg sync.WaitGroup - aliveLoopSub Subscription - finalizedBlockSub Subscription + healthCheckSubs []Subscription } func NewNode[ @@ -180,9 +179,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) RPC() RPC { // unsubscribeAllExceptAliveLoop is not thread-safe; it should only be called // while holding the stateMu lock. func (n *node[CHAIN_ID, HEAD, RPC]) unsubscribeAllExceptAliveLoop() { - aliveLoopSub := n.aliveLoopSub - finalizedBlockSub := n.finalizedBlockSub - n.rpc.UnsubscribeAllExcept(aliveLoopSub, finalizedBlockSub) + n.rpc.UnsubscribeAllExcept(n.healthCheckSubs...) } func (n *node[CHAIN_ID, HEAD, RPC]) UnsubscribeAllExceptAliveLoop() { diff --git a/pkg/solana/client/multinode/node_fsm.go b/pkg/solana/client/multinode/node_fsm.go index 1111210c4..981e325da 100644 --- a/pkg/solana/client/multinode/node_fsm.go +++ b/pkg/solana/client/multinode/node_fsm.go @@ -256,7 +256,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToOutOfSync(fn func()) { } switch n.state { case NodeStateAlive: - n.unsubscribeAllExceptAliveLoop() + n.rpc.Close() n.state = NodeStateOutOfSync default: panic(transitionFail(n.state, NodeStateOutOfSync)) @@ -281,7 +281,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToUnreachable(fn func()) { } switch n.state { case NodeStateUndialed, NodeStateDialed, NodeStateAlive, NodeStateOutOfSync, NodeStateInvalidChainID, NodeStateSyncing: - n.unsubscribeAllExceptAliveLoop() + n.rpc.Close() n.state = NodeStateUnreachable default: panic(transitionFail(n.state, NodeStateUnreachable)) @@ -324,7 +324,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToInvalidChainID(fn func()) { } switch n.state { case NodeStateDialed, NodeStateOutOfSync, NodeStateSyncing: - n.unsubscribeAllExceptAliveLoop() + n.rpc.Close() n.state = NodeStateInvalidChainID default: panic(transitionFail(n.state, NodeStateInvalidChainID)) @@ -349,7 +349,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToSyncing(fn func()) { } switch n.state { case NodeStateDialed, NodeStateOutOfSync, NodeStateInvalidChainID: - n.unsubscribeAllExceptAliveLoop() + n.rpc.Close() n.state = NodeStateSyncing default: panic(transitionFail(n.state, NodeStateSyncing)) diff --git a/pkg/solana/client/multinode/node_lifecycle.go b/pkg/solana/client/multinode/node_lifecycle.go index 823a1abc3..44203bf97 100644 --- a/pkg/solana/client/multinode/node_lifecycle.go +++ b/pkg/solana/client/multinode/node_lifecycle.go @@ -7,16 +7,12 @@ import ( "math/big" "time" - "github.com/smartcontractkit/chainlink/v2/common/types" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/utils" bigmath "github.com/smartcontractkit/chainlink-common/pkg/utils/big_math" - - iutils "github.com/smartcontractkit/chainlink/v2/common/internal/utils" ) var ( @@ -103,15 +99,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { return } - n.stateMu.Lock() - n.aliveLoopSub = headsSub.sub - n.stateMu.Unlock() - defer func() { - defer headsSub.sub.Unsubscribe() - n.stateMu.Lock() - n.aliveLoopSub = nil - n.stateMu.Unlock() - }() + defer n.unsubscribeHealthChecks() var pollCh <-chan time.Time if pollInterval > 0 { @@ -138,16 +126,6 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { n.declareUnreachable() return } - - n.stateMu.Lock() - n.finalizedBlockSub = finalizedHeadsSub.sub - n.stateMu.Unlock() - defer func() { - finalizedHeadsSub.Unsubscribe() - n.stateMu.Lock() - n.finalizedBlockSub = nil - n.stateMu.Unlock() - }() } localHighestChainInfo, _ := n.rpc.GetInterceptedChainInfo() @@ -187,7 +165,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { return } _, latestChainInfo := n.StateAndLatest() - if outOfSync, liveNodes := n.syncStatus(latestChainInfo.BlockNumber, latestChainInfo.TotalDifficulty); outOfSync { + if outOfSync, liveNodes := n.isOutOfSyncWithPool(latestChainInfo); outOfSync { // note: there must be another live node for us to be out of sync lggr.Errorw("RPC endpoint has fallen behind", "blockNumber", latestChainInfo.BlockNumber, "totalDifficulty", latestChainInfo.TotalDifficulty, "nodeState", n.getCachedState()) if liveNodes < 2 { @@ -232,17 +210,11 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { n.declareUnreachable() return } - if !latestFinalized.IsValid() { - lggr.Warn("Latest finalized block is not valid") - continue - } - latestFinalizedBN := latestFinalized.BlockNumber() - if latestFinalizedBN > localHighestChainInfo.FinalizedBlockNumber { - promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) - localHighestChainInfo.FinalizedBlockNumber = latestFinalizedBN + receivedNewHead := n.onNewFinalizedHead(lggr, &localHighestChainInfo, latestFinalized) + if receivedNewHead && noNewFinalizedBlocksTimeoutThreshold > 0 { + finalizedHeadsSub.ResetTimer(noNewFinalizedBlocksTimeoutThreshold) } - case <-finalizedHeadsSub.NoNewHeads: // We haven't received a finalized head on the channel for at least the // threshold amount of time, mark it broken @@ -266,13 +238,22 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() { } } +func (n *node[CHAIN_ID, HEAD, RPC]) unsubscribeHealthChecks() { + n.stateMu.Lock() + for _, sub := range n.healthCheckSubs { + sub.Unsubscribe() + } + n.healthCheckSubs = []Subscription{} + n.stateMu.Unlock() +} + type headSubscription[HEAD any] struct { Heads <-chan HEAD Errors <-chan error NoNewHeads <-chan time.Time noNewHeadsTicker *time.Ticker - sub types.Subscription + sub Subscription cleanUpTasks []func() } @@ -287,10 +268,10 @@ func (sub *headSubscription[HEAD]) Unsubscribe() { } func (n *node[CHAIN_ID, HEAD, PRC]) registerNewSubscription(ctx context.Context, lggr logger.SugaredLogger, - noNewDataThreshold time.Duration, newSub func(ctx context.Context) (<-chan HEAD, types.Subscription, error)) (headSubscription[HEAD], error) { + noNewDataThreshold time.Duration, newSub func(ctx context.Context) (<-chan HEAD, Subscription, error)) (headSubscription[HEAD], error) { result := headSubscription[HEAD]{} var err error - var sub types.Subscription + var sub Subscription result.Heads, sub, err = newSub(ctx) if err != nil { return result, err @@ -299,11 +280,10 @@ func (n *node[CHAIN_ID, HEAD, PRC]) registerNewSubscription(ctx context.Context, result.Errors = sub.Err() lggr.Debug("Successfully subscribed") - // TODO: will be removed as part of merging effort with BCI-2875 result.sub = sub - //n.stateMu.Lock() - //n.healthCheckSubs = append(n.healthCheckSubs, sub) - //n.stateMu.Unlock() + n.stateMu.Lock() + n.healthCheckSubs = append(n.healthCheckSubs, sub) + n.stateMu.Unlock() result.cleanUpTasks = append(result.cleanUpTasks, sub.Unsubscribe) @@ -365,31 +345,6 @@ func (n *node[CHAIN_ID, HEAD, RPC]) onNewHead(lggr logger.SugaredLogger, chainIn return true } -// syncStatus returns outOfSync true if num or td is more than SyncThresold behind the best node. -// Always returns outOfSync false for SyncThreshold 0. -// liveNodes is only included when outOfSync is true. -func (n *node[CHAIN_ID, HEAD, RPC]) syncStatus(num int64, td *big.Int) (outOfSync bool, liveNodes int) { - if n.poolInfoProvider == nil { - return // skip for tests - } - threshold := n.nodePoolCfg.SyncThreshold() - if threshold == 0 { - return // disabled - } - // Check against best node - ln, ci := n.poolInfoProvider.LatestChainInfo() - mode := n.nodePoolCfg.SelectionMode() - switch mode { - case NodeSelectionModeHighestHead, NodeSelectionModeRoundRobin, NodeSelectionModePriorityLevel: - return num < ci.BlockNumber-int64(threshold), ln - case NodeSelectionModeTotalDifficulty: - bigThreshold := big.NewInt(int64(threshold)) - return td.Cmp(bigmath.Sub(ci.TotalDifficulty, bigThreshold)) < 0, ln - default: - panic("unrecognized NodeSelectionMode: " + mode) - } -} - const ( msgReceivedBlock = "Received block for RPC node, waiting until back in-sync to mark as live again" msgReceivedFinalizedBlock = "Received new finalized block for RPC node, waiting until back in-sync to mark as live again" @@ -462,8 +417,9 @@ func (n *node[CHAIN_ID, HEAD, RPC]) outOfSyncLoop(syncIssues syncStatus) { return } + defer n.unsubscribeHealthChecks() + lggr.Tracew("Successfully subscribed to heads feed on out-of-sync RPC node") - defer headsSub.Unsubscribe() noNewFinalizedBlocksTimeoutThreshold := n.chainCfg.NoNewFinalizedHeadsThreshold() var finalizedHeadsSub headSubscription[HEAD] @@ -477,7 +433,6 @@ func (n *node[CHAIN_ID, HEAD, RPC]) outOfSyncLoop(syncIssues syncStatus) { } lggr.Tracew("Successfully subscribed to finalized heads feed on out-of-sync RPC node") - defer finalizedHeadsSub.Unsubscribe() } _, localHighestChainInfo := n.rpc.GetInterceptedChainInfo() @@ -591,7 +546,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) unreachableLoop() { lggr := logger.Sugared(logger.Named(n.lfcLog, "Unreachable")) lggr.Debugw("Trying to revive unreachable RPC node", "nodeState", n.getCachedState()) - dialRetryBackoff := iutils.NewRedialBackoff() + dialRetryBackoff := NewRedialBackoff() for { select { @@ -654,7 +609,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) invalidChainIDLoop() { lggr.Debugw(fmt.Sprintf("Periodically re-checking RPC node %s with invalid chain ID", n.String()), "nodeState", n.getCachedState()) - chainIDRecheckBackoff := iutils.NewRedialBackoff() + chainIDRecheckBackoff := NewRedialBackoff() for { select { @@ -704,7 +659,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) syncingLoop() { return } - recheckBackoff := iutils.NewRedialBackoff() + recheckBackoff := NewRedialBackoff() for { select { diff --git a/pkg/solana/client/multinode/node_selector.go b/pkg/solana/client/multinode/node_selector.go index 372b521bb..872026fe2 100644 --- a/pkg/solana/client/multinode/node_selector.go +++ b/pkg/solana/client/multinode/node_selector.go @@ -2,8 +2,6 @@ package client import ( "fmt" - - "github.com/smartcontractkit/chainlink/v2/common/types" ) const ( @@ -14,7 +12,7 @@ const ( ) type NodeSelector[ - CHAIN_ID types.ID, + CHAIN_ID ID, RPC any, ] interface { // Select returns a Node, or nil if none can be selected. @@ -25,7 +23,7 @@ type NodeSelector[ } func newNodeSelector[ - CHAIN_ID types.ID, + CHAIN_ID ID, RPC any, ](selectionMode string, nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { switch selectionMode { diff --git a/pkg/solana/client/multinode/node_selector_highest_head.go b/pkg/solana/client/multinode/node_selector_highest_head.go new file mode 100644 index 000000000..52188bbdf --- /dev/null +++ b/pkg/solana/client/multinode/node_selector_highest_head.go @@ -0,0 +1,38 @@ +package client + +import ( + "math" +) + +type highestHeadNodeSelector[ + CHAIN_ID ID, + RPC any, +] []Node[CHAIN_ID, RPC] + +func NewHighestHeadNodeSelector[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { + return highestHeadNodeSelector[CHAIN_ID, RPC](nodes) +} + +func (s highestHeadNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { + var highestHeadNumber int64 = math.MinInt64 + var highestHeadNodes []Node[CHAIN_ID, RPC] + for _, n := range s { + state, currentChainInfo := n.StateAndLatest() + currentHeadNumber := currentChainInfo.BlockNumber + if state == NodeStateAlive && currentHeadNumber >= highestHeadNumber { + if highestHeadNumber < currentHeadNumber { + highestHeadNumber = currentHeadNumber + highestHeadNodes = nil + } + highestHeadNodes = append(highestHeadNodes, n) + } + } + return firstOrHighestPriority(highestHeadNodes) +} + +func (s highestHeadNodeSelector[CHAIN_ID, RPC]) Name() string { + return NodeSelectionModeHighestHead +} diff --git a/pkg/solana/client/multinode/node_selector_priority_level.go b/pkg/solana/client/multinode/node_selector_priority_level.go new file mode 100644 index 000000000..3e171b98b --- /dev/null +++ b/pkg/solana/client/multinode/node_selector_priority_level.go @@ -0,0 +1,121 @@ +package client + +import ( + "math" + "sort" + "sync/atomic" +) + +type priorityLevelNodeSelector[ + CHAIN_ID ID, + RPC any, +] struct { + nodes []Node[CHAIN_ID, RPC] + roundRobinCount []atomic.Uint32 +} + +type nodeWithPriority[ + CHAIN_ID ID, + RPC any, +] struct { + node Node[CHAIN_ID, RPC] + priority int32 +} + +func NewPriorityLevelNodeSelector[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { + return &priorityLevelNodeSelector[CHAIN_ID, RPC]{ + nodes: nodes, + roundRobinCount: make([]atomic.Uint32, nrOfPriorityTiers(nodes)), + } +} + +func (s priorityLevelNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { + nodes := s.getHighestPriorityAliveTier() + + if len(nodes) == 0 { + return nil + } + priorityLevel := nodes[len(nodes)-1].priority + + // NOTE: Inc returns the number after addition, so we must -1 to get the "current" counter + count := s.roundRobinCount[priorityLevel].Add(1) - 1 + idx := int(count % uint32(len(nodes))) + + return nodes[idx].node +} + +func (s priorityLevelNodeSelector[CHAIN_ID, RPC]) Name() string { + return NodeSelectionModePriorityLevel +} + +// getHighestPriorityAliveTier filters nodes that are not in state NodeStateAlive and +// returns only the highest tier of alive nodes +func (s priorityLevelNodeSelector[CHAIN_ID, RPC]) getHighestPriorityAliveTier() []nodeWithPriority[CHAIN_ID, RPC] { + var nodes []nodeWithPriority[CHAIN_ID, RPC] + for _, n := range s.nodes { + if n.State() == NodeStateAlive { + nodes = append(nodes, nodeWithPriority[CHAIN_ID, RPC]{n, n.Order()}) + } + } + + if len(nodes) == 0 { + return nil + } + + return removeLowerTiers(nodes) +} + +// removeLowerTiers take a slice of nodeWithPriority[CHAIN_ID, BLOCK_HASH, HEAD, RPC] and keeps only the highest tier +func removeLowerTiers[ + CHAIN_ID ID, + RPC any, +](nodes []nodeWithPriority[CHAIN_ID, RPC]) []nodeWithPriority[CHAIN_ID, RPC] { + sort.SliceStable(nodes, func(i, j int) bool { + return nodes[i].priority > nodes[j].priority + }) + + var nodes2 []nodeWithPriority[CHAIN_ID, RPC] + currentPriority := nodes[len(nodes)-1].priority + + for _, n := range nodes { + if n.priority == currentPriority { + nodes2 = append(nodes2, n) + } + } + + return nodes2 +} + +// nrOfPriorityTiers calculates the total number of priority tiers +func nrOfPriorityTiers[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) int32 { + highestPriority := int32(0) + for _, n := range nodes { + priority := n.Order() + if highestPriority < priority { + highestPriority = priority + } + } + return highestPriority + 1 +} + +// firstOrHighestPriority takes a list of nodes and returns the first one with the highest priority +func firstOrHighestPriority[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) Node[CHAIN_ID, RPC] { + hp := int32(math.MaxInt32) + var node Node[CHAIN_ID, RPC] + for _, n := range nodes { + if n.Order() < hp { + hp = n.Order() + node = n + } + } + return node +} diff --git a/pkg/solana/client/multinode/node_selector_round_robin.go b/pkg/solana/client/multinode/node_selector_round_robin.go new file mode 100644 index 000000000..52fa9d6c8 --- /dev/null +++ b/pkg/solana/client/multinode/node_selector_round_robin.go @@ -0,0 +1,46 @@ +package client + +import ( + "sync/atomic" +) + +type roundRobinSelector[ + CHAIN_ID ID, + RPC any, +] struct { + nodes []Node[CHAIN_ID, RPC] + roundRobinCount atomic.Uint32 +} + +func NewRoundRobinSelector[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { + return &roundRobinSelector[CHAIN_ID, RPC]{ + nodes: nodes, + } +} + +func (s *roundRobinSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { + var liveNodes []Node[CHAIN_ID, RPC] + for _, n := range s.nodes { + if n.State() == NodeStateAlive { + liveNodes = append(liveNodes, n) + } + } + + nNodes := len(liveNodes) + if nNodes == 0 { + return nil + } + + // NOTE: Inc returns the number after addition, so we must -1 to get the "current" counter + count := s.roundRobinCount.Add(1) - 1 + idx := int(count % uint32(nNodes)) + + return liveNodes[idx] +} + +func (s *roundRobinSelector[CHAIN_ID, RPC]) Name() string { + return NodeSelectionModeRoundRobin +} diff --git a/pkg/solana/client/multinode/node_selector_total_difficulty.go b/pkg/solana/client/multinode/node_selector_total_difficulty.go new file mode 100644 index 000000000..3f3c79de9 --- /dev/null +++ b/pkg/solana/client/multinode/node_selector_total_difficulty.go @@ -0,0 +1,51 @@ +package client + +import ( + "math/big" +) + +type totalDifficultyNodeSelector[ + CHAIN_ID ID, + RPC any, +] []Node[CHAIN_ID, RPC] + +func NewTotalDifficultyNodeSelector[ + CHAIN_ID ID, + RPC any, +](nodes []Node[CHAIN_ID, RPC]) NodeSelector[CHAIN_ID, RPC] { + return totalDifficultyNodeSelector[CHAIN_ID, RPC](nodes) +} + +func (s totalDifficultyNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { + // NodeNoNewHeadsThreshold may not be enabled, in this case all nodes have td == nil + var highestTD *big.Int + var nodes []Node[CHAIN_ID, RPC] + var aliveNodes []Node[CHAIN_ID, RPC] + + for _, n := range s { + state, currentChainInfo := n.StateAndLatest() + if state != NodeStateAlive { + continue + } + + currentTD := currentChainInfo.TotalDifficulty + aliveNodes = append(aliveNodes, n) + if currentTD != nil && (highestTD == nil || currentTD.Cmp(highestTD) >= 0) { + if highestTD == nil || currentTD.Cmp(highestTD) > 0 { + highestTD = currentTD + nodes = nil + } + nodes = append(nodes, n) + } + } + + //If all nodes have td == nil pick one from the nodes that are alive + if len(nodes) == 0 { + return firstOrHighestPriority(aliveNodes) + } + return firstOrHighestPriority(nodes) +} + +func (s totalDifficultyNodeSelector[CHAIN_ID, RPC]) Name() string { + return NodeSelectionModeTotalDifficulty +} diff --git a/pkg/solana/client/multinode/poller.go b/pkg/solana/client/multinode/poller.go index d6080722c..eeb6c3af5 100644 --- a/pkg/solana/client/multinode/poller.go +++ b/pkg/solana/client/multinode/poller.go @@ -2,7 +2,6 @@ package client import ( "context" - "sync" "time" "github.com/smartcontractkit/chainlink-common/pkg/logger" @@ -15,83 +14,80 @@ import ( // and delivers the result to a channel. It is used by multinode to poll // for new heads and implements the Subscription interface. type Poller[T any] struct { - services.StateMachine + services.Service + eng *services.Engine + pollingInterval time.Duration pollingFunc func(ctx context.Context) (T, error) pollingTimeout time.Duration - logger logger.Logger channel chan<- T errCh chan error - - stopCh services.StopChan - wg sync.WaitGroup } // NewPoller creates a new Poller instance and returns a channel to receive the polled data func NewPoller[ T any, -](pollingInterval time.Duration, pollingFunc func(ctx context.Context) (T, error), pollingTimeout time.Duration, logger logger.Logger) (Poller[T], <-chan T) { +](pollingInterval time.Duration, pollingFunc func(ctx context.Context) (T, error), pollingTimeout time.Duration, lggr logger.Logger) (Poller[T], <-chan T) { channel := make(chan T) - return Poller[T]{ + p := Poller[T]{ pollingInterval: pollingInterval, pollingFunc: pollingFunc, pollingTimeout: pollingTimeout, channel: channel, - logger: logger, errCh: make(chan error), - stopCh: make(chan struct{}), - }, channel + } + p.Service, p.eng = services.Config{ + Name: "Poller", + Start: p.start, + Close: p.close, + }.NewServiceEngine(lggr) + return p, channel } var _ types.Subscription = &Poller[any]{} -func (p *Poller[T]) Start() error { - return p.StartOnce("Poller", func() error { - p.wg.Add(1) - go p.pollingLoop() - return nil - }) +func (p *Poller[T]) start(ctx context.Context) error { + p.eng.Go(p.pollingLoop) + return nil } // Unsubscribe cancels the sending of events to the data channel func (p *Poller[T]) Unsubscribe() { - _ = p.StopOnce("Poller", func() error { - close(p.stopCh) - p.wg.Wait() - close(p.errCh) - close(p.channel) - return nil - }) + _ = p.Close() +} + +func (p *Poller[T]) close() error { + close(p.errCh) + close(p.channel) + return nil } func (p *Poller[T]) Err() <-chan error { return p.errCh } -func (p *Poller[T]) pollingLoop() { - defer p.wg.Done() - +func (p *Poller[T]) pollingLoop(ctx context.Context) { ticker := time.NewTicker(p.pollingInterval) defer ticker.Stop() for { select { - case <-p.stopCh: + case <-ctx.Done(): return case <-ticker.C: // Set polling timeout - pollingCtx, cancelPolling := p.stopCh.CtxCancel(context.WithTimeout(context.Background(), p.pollingTimeout)) + pollingCtx, cancelPolling := context.WithTimeout(ctx, p.pollingTimeout) // Execute polling function result, err := p.pollingFunc(pollingCtx) cancelPolling() if err != nil { - p.logger.Warnf("polling error: %v", err) + p.eng.Warnf("polling error: %v", err) continue } // Send result to channel or block if channel is full select { case p.channel <- result: - case <-p.stopCh: + case <-ctx.Done(): return } } diff --git a/pkg/solana/client/multinode/redialbackoff.go b/pkg/solana/client/multinode/redialbackoff.go new file mode 100644 index 000000000..41be2232d --- /dev/null +++ b/pkg/solana/client/multinode/redialbackoff.go @@ -0,0 +1,17 @@ +package client + +import ( + "time" + + "github.com/jpillora/backoff" +) + +// NewRedialBackoff is a standard backoff to use for redialling or reconnecting to +// unreachable network endpoints +func NewRedialBackoff() backoff.Backoff { + return backoff.Backoff{ + Min: 1 * time.Second, + Max: 15 * time.Second, + Jitter: true, + } +} diff --git a/pkg/solana/client/multinode/send_only_node.go b/pkg/solana/client/multinode/send_only_node.go index 069911c78..1ff8efa79 100644 --- a/pkg/solana/client/multinode/send_only_node.go +++ b/pkg/solana/client/multinode/send_only_node.go @@ -137,7 +137,7 @@ func (s *sendOnlyNode[CHAIN_ID, RPC]) start(startCtx context.Context) { promPoolRPCNodeTransitionsToAlive.WithLabelValues(s.chainID.String(), s.name).Inc() s.setState(NodeStateAlive) - s.log.Infow("Sendonly RPC Node is online", "NodeState", s.state) + s.log.Infow("Sendonly RPC Node is online", "nodeState", s.state) } func (s *sendOnlyNode[CHAIN_ID, RPC]) Close() error { diff --git a/pkg/solana/client/multinode/transaction_sender.go b/pkg/solana/client/multinode/transaction_sender.go index a4c5e2b3d..1408e7417 100644 --- a/pkg/solana/client/multinode/transaction_sender.go +++ b/pkg/solana/client/multinode/transaction_sender.go @@ -2,6 +2,7 @@ package client import ( "context" + "errors" "fmt" "math" "slices" @@ -13,6 +14,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/common/types" ) var ( @@ -40,7 +42,7 @@ type SendTxRPCClient[TX any] interface { SendTransaction(ctx context.Context, tx TX) error } -func NewTransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]]( +func NewTransactionSender[TX any, CHAIN_ID types.ID, RPC SendTxRPCClient[TX]]( lggr logger.Logger, chainID CHAIN_ID, chainFamily string, @@ -62,7 +64,7 @@ func NewTransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]]( } } -type TransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]] struct { +type TransactionSender[TX any, CHAIN_ID types.ID, RPC SendTxRPCClient[TX]] struct { services.StateMachine chainID CHAIN_ID chainFamily string @@ -133,12 +135,6 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) SendTransaction(ctx contex } }() }) - if err != nil { - primaryNodeWg.Wait() - close(txResultsToReport) - close(txResults) - return 0, err - } // This needs to be done in parallel so the reporting knows when it's done (when the channel is closed) txSender.wg.Add(1) @@ -149,6 +145,10 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) SendTransaction(ctx contex close(txResults) }() + if err != nil { + return 0, err + } + txSender.wg.Add(1) go txSender.reportSendTxAnomalies(tx, txResultsToReport) @@ -167,7 +167,7 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) broadcastTxAsync(ctx conte func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) reportSendTxAnomalies(tx TX, txResults <-chan sendTxResult) { defer txSender.wg.Done() - resultsByCode := sendTxErrors{} + resultsByCode := sendTxResults{} // txResults eventually will be closed for txResult := range txResults { resultsByCode[txResult.ResultCode] = append(resultsByCode[txResult.ResultCode], txResult.Err) @@ -180,9 +180,9 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) reportSendTxAnomalies(tx T } } -type sendTxErrors map[SendTxReturnCode][]error +type sendTxResults map[SendTxReturnCode][]error -func aggregateTxResults(resultsByCode sendTxErrors) (returnCode SendTxReturnCode, txResult error, err error) { +func aggregateTxResults(resultsByCode sendTxResults) (returnCode SendTxReturnCode, txResult error, err error) { severeCode, severeErrors, hasSevereErrors := findFirstIn(resultsByCode, sendTxSevereErrors) successCode, successResults, hasSuccess := findFirstIn(resultsByCode, sendTxSuccessfulCodes) if hasSuccess { @@ -191,7 +191,7 @@ func aggregateTxResults(resultsByCode sendTxErrors) (returnCode SendTxReturnCode if hasSevereErrors { const errMsg = "found contradictions in nodes replies on SendTransaction: got success and severe error" // return success, since at least 1 node has accepted our broadcasted Tx, and thus it can now be included onchain - return successCode, successResults[0], fmt.Errorf(errMsg) + return successCode, successResults[0], errors.New(errMsg) } // other errors are temporary - we are safe to return success @@ -208,7 +208,7 @@ func aggregateTxResults(resultsByCode sendTxErrors) (returnCode SendTxReturnCode } err = fmt.Errorf("expected at least one response on SendTransaction") - return 0, err, err + return Retryable, err, err } func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) collectTxResults(ctx context.Context, tx TX, healthyNodesNum int, txResults <-chan sendTxResult) (SendTxReturnCode, error) { @@ -216,7 +216,7 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) collectTxResults(ctx conte return 0, ErroringNodeError } requiredResults := int(math.Ceil(float64(healthyNodesNum) * sendTxQuorum)) - errorsByCode := sendTxErrors{} + errorsByCode := sendTxResults{} var softTimeoutChan <-chan time.Time var resultsCount int loop: diff --git a/pkg/solana/client/multinode/types.go b/pkg/solana/client/multinode/types.go index 5cd831fc1..6c863c867 100644 --- a/pkg/solana/client/multinode/types.go +++ b/pkg/solana/client/multinode/types.go @@ -6,14 +6,9 @@ import ( "math/big" ) -// A chain-agnostic generic interface to represent the following native types on various chains: -// PublicKey, Address, Account, BlockHash, TxHash -type Hashable interface { - fmt.Stringer - comparable - - Bytes() []byte -} +// ID represents the base type, for any chain's ID. +// It should be convertible to a string, that can uniquely identify this chain +type ID fmt.Stringer // Subscription represents an event subscription where events are // delivered on a data channel. @@ -30,7 +25,7 @@ type Subscription interface { Err() <-chan error } -// RPCClient includes all the necessary generalized RPC methods along with any additional chain-specific methods. +// RPCClient includes all the necessary generalized RPC methods used by Node to perform health checks type RPCClient[ CHAIN_ID ID, HEAD Head, @@ -104,21 +99,3 @@ func MaxTotalDifficulty(a, b *big.Int) *big.Int { return big.NewInt(0).Set(b) } - -// ID represents the base type, for any chain's ID. -// It should be convertible to a string, that can uniquely identify this chain -type ID fmt.Stringer - -type multiNodeContextKey int - -const ( - contextKeyHeathCheckRequest multiNodeContextKey = iota + 1 -) - -func CtxAddHealthCheckFlag(ctx context.Context) context.Context { - return context.WithValue(ctx, contextKeyHeathCheckRequest, struct{}{}) -} - -func CtxIsHeathCheckRequest(ctx context.Context) bool { - return ctx.Value(contextKeyHeathCheckRequest) != nil -} diff --git a/pkg/solana/client/rpc_client.go b/pkg/solana/client/rpc_client.go index 0db8d9062..ec89d2b66 100644 --- a/pkg/solana/client/rpc_client.go +++ b/pkg/solana/client/rpc_client.go @@ -24,8 +24,6 @@ func (s StringID) String() string { return string(s) } -// TODO: ChainReaderWriter needs ChainID() (string, error) -// TODO: We probably don't need this though? var _ ReaderWriter = (*RpcClient)(nil) type Head struct { @@ -61,6 +59,8 @@ type RpcClient struct { requestGroup *singleflight.Group } +// TODO: BCI-4061: Implement RPC Client for MultiNode + func (c *RpcClient) Dial(ctx context.Context) error { //TODO implement me panic("implement me") From c44a6ce77c5e71cccb4312af2b8a4a5e2113b176 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 3 Sep 2024 11:58:39 -0400 Subject: [PATCH 03/22] Add MultiNode flag --- pkg/solana/cmd/chainlink-solana/main.go | 12 +++++++++++- pkg/solana/config/multinode.go | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/solana/cmd/chainlink-solana/main.go b/pkg/solana/cmd/chainlink-solana/main.go index 08893e7de..6a966a693 100644 --- a/pkg/solana/cmd/chainlink-solana/main.go +++ b/pkg/solana/cmd/chainlink-solana/main.go @@ -66,10 +66,20 @@ func (c *pluginRelayer) NewRelayer(ctx context.Context, config string, keystore Logger: c.Logger, KeyStore: keystore, } - chain, err := solana.NewChain(&cfg.Solana, opts) + + var chain solana.Chain + var err error + + if cfg.Solana.MultiNodeConfig().MultiNodeEnabled() { + chain, err = solana.NewMultiNodeChain(&cfg.Solana, opts) + } else { + chain, err = solana.NewChain(&cfg.Solana, opts) + } + if err != nil { return nil, fmt.Errorf("failed to create chain: %w", err) } + ra := &loop.RelayerAdapter{Relayer: solana.NewRelayer(c.Logger, chain, capRegistry), RelayerExt: chain} c.SubService(ra) diff --git a/pkg/solana/config/multinode.go b/pkg/solana/config/multinode.go index 72e345728..1755e6ee6 100644 --- a/pkg/solana/config/multinode.go +++ b/pkg/solana/config/multinode.go @@ -83,4 +83,5 @@ func (c *MultiNode) FinalizedBlockOffset() uint32 { func (c *MultiNode) SetDefaults() { // TODO: Set defaults for MultiNode config https://smartcontract-it.atlassian.net/browse/BCI-4065 + c.multiNodeEnabled = false } From 64db86a4a00c8366864d18798df593cb3daab70c Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 3 Sep 2024 12:05:13 -0400 Subject: [PATCH 04/22] Remove internal dependency --- pkg/solana/client/multinode/send_only_node.go | 10 ++++------ .../client/multinode/send_only_node_lifecycle.go | 4 +--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pkg/solana/client/multinode/send_only_node.go b/pkg/solana/client/multinode/send_only_node.go index 1ff8efa79..4f60f566d 100644 --- a/pkg/solana/client/multinode/send_only_node.go +++ b/pkg/solana/client/multinode/send_only_node.go @@ -8,12 +8,10 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - - "github.com/smartcontractkit/chainlink/v2/common/types" ) type sendOnlyClient[ - CHAIN_ID types.ID, + CHAIN_ID ID, ] interface { Close() ChainID(context.Context) (CHAIN_ID, error) @@ -22,7 +20,7 @@ type sendOnlyClient[ // SendOnlyNode represents one node used as a sendonly type SendOnlyNode[ - CHAIN_ID types.ID, + CHAIN_ID ID, RPC any, ] interface { // Start may attempt to connect to the node, but should only return error for misconfiguration - never for temporary errors. @@ -42,7 +40,7 @@ type SendOnlyNode[ // It only supports sending transactions // It must use an http(s) url type sendOnlyNode[ - CHAIN_ID types.ID, + CHAIN_ID ID, RPC sendOnlyClient[CHAIN_ID], ] struct { services.StateMachine @@ -61,7 +59,7 @@ type sendOnlyNode[ // NewSendOnlyNode returns a new sendonly node func NewSendOnlyNode[ - CHAIN_ID types.ID, + CHAIN_ID ID, RPC sendOnlyClient[CHAIN_ID], ]( lggr logger.Logger, diff --git a/pkg/solana/client/multinode/send_only_node_lifecycle.go b/pkg/solana/client/multinode/send_only_node_lifecycle.go index a6ac11248..83642feba 100644 --- a/pkg/solana/client/multinode/send_only_node_lifecycle.go +++ b/pkg/solana/client/multinode/send_only_node_lifecycle.go @@ -3,8 +3,6 @@ package client import ( "fmt" "time" - - "github.com/smartcontractkit/chainlink/v2/common/internal/utils" ) // verifyLoop may only be triggered once, on Start, if initial chain ID check @@ -16,7 +14,7 @@ func (s *sendOnlyNode[CHAIN_ID, RPC]) verifyLoop() { ctx, cancel := s.chStop.NewCtx() defer cancel() - backoff := utils.NewRedialBackoff() + backoff := NewRedialBackoff() for { select { case <-ctx.Done(): From f7c1bc95b193cfb3e1e4ecbda2e46fba85d88002 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 3 Sep 2024 12:10:52 -0400 Subject: [PATCH 05/22] Fix build --- pkg/solana/client/multinode/poller.go | 4 +--- pkg/solana/client/multinode/transaction_sender.go | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/solana/client/multinode/poller.go b/pkg/solana/client/multinode/poller.go index eeb6c3af5..9ebe1dcfc 100644 --- a/pkg/solana/client/multinode/poller.go +++ b/pkg/solana/client/multinode/poller.go @@ -6,8 +6,6 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - - "github.com/smartcontractkit/chainlink/v2/common/types" ) // Poller is a component that polls a function at a given interval @@ -44,7 +42,7 @@ func NewPoller[ return p, channel } -var _ types.Subscription = &Poller[any]{} +var _ Subscription = &Poller[any]{} func (p *Poller[T]) start(ctx context.Context) error { p.eng.Go(p.pollingLoop) diff --git a/pkg/solana/client/multinode/transaction_sender.go b/pkg/solana/client/multinode/transaction_sender.go index 1408e7417..d567e164f 100644 --- a/pkg/solana/client/multinode/transaction_sender.go +++ b/pkg/solana/client/multinode/transaction_sender.go @@ -14,7 +14,6 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - "github.com/smartcontractkit/chainlink/v2/common/types" ) var ( @@ -42,7 +41,7 @@ type SendTxRPCClient[TX any] interface { SendTransaction(ctx context.Context, tx TX) error } -func NewTransactionSender[TX any, CHAIN_ID types.ID, RPC SendTxRPCClient[TX]]( +func NewTransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]]( lggr logger.Logger, chainID CHAIN_ID, chainFamily string, @@ -64,7 +63,7 @@ func NewTransactionSender[TX any, CHAIN_ID types.ID, RPC SendTxRPCClient[TX]]( } } -type TransactionSender[TX any, CHAIN_ID types.ID, RPC SendTxRPCClient[TX]] struct { +type TransactionSender[TX any, CHAIN_ID ID, RPC SendTxRPCClient[TX]] struct { services.StateMachine chainID CHAIN_ID chainFamily string From 9e91b476f7939c79e99a9f2ded8450a9c5260f76 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 4 Sep 2024 12:40:03 -0400 Subject: [PATCH 06/22] Fix import cycle --- pkg/solana/chain.go | 6 +- pkg/solana/chain_multinode.go | 21 +- pkg/solana/client/client.go | 8 +- pkg/solana/client/client_test.go | 3 +- pkg/solana/client/mocks/ReaderWriter.go | 23 +- pkg/solana/client/multinode/poller_test.go | 187 --------- .../client/multinode/send_only_node_test.go | 139 ------- .../multinode/transaction_sender_test.go | 360 ------------------ pkg/solana/client/multinode/types.go | 7 + pkg/solana/client/rpc_client.go | 10 +- 10 files changed, 43 insertions(+), 721 deletions(-) delete mode 100644 pkg/solana/client/multinode/poller_test.go delete mode 100644 pkg/solana/client/multinode/send_only_node_test.go delete mode 100644 pkg/solana/client/multinode/transaction_sender_test.go diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index 20f6322d5..bc2dd845a 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -22,6 +22,8 @@ import ( relaytypes "github.com/smartcontractkit/chainlink-common/pkg/types" "github.com/smartcontractkit/chainlink-common/pkg/types/core" + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/client" "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" @@ -187,13 +189,13 @@ func (v *verifiedCachedClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, return v.ReaderWriter.LatestBlockhash() } -func (v *verifiedCachedClient) ChainID(ctx context.Context) (client.StringID, error) { +func (v *verifiedCachedClient) ChainID(ctx context.Context) (mn.StringID, error) { verified, err := v.verifyChainID() if !verified { return "", err } - return client.StringID(v.chainID), nil + return mn.StringID(v.chainID), nil } func (v *verifiedCachedClient) GetFeeForMessage(msg string) (uint64, error) { diff --git a/pkg/solana/chain_multinode.go b/pkg/solana/chain_multinode.go index 82fb5b23f..cb7161a55 100644 --- a/pkg/solana/chain_multinode.go +++ b/pkg/solana/chain_multinode.go @@ -15,6 +15,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/loop" "github.com/smartcontractkit/chainlink-common/pkg/services" relaytypes "github.com/smartcontractkit/chainlink-common/pkg/types" + "github.com/smartcontractkit/chainlink-solana/pkg/solana/client" mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" @@ -39,8 +40,8 @@ type multiNodeChain struct { services.StateMachine id string cfg *config.TOMLConfig - multiNode *mn.MultiNode[client.StringID, *client.RpcClient] - txSender *mn.TransactionSender[*solanago.Transaction, client.StringID, *client.RpcClient] + multiNode *mn.MultiNode[mn.StringID, *client.RpcClient] + txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RpcClient] txm *txm.Txm balanceMonitor services.Service lggr logger.Logger @@ -57,7 +58,7 @@ func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr mnCfg := cfg.MultiNodeConfig() - var nodes []mn.Node[client.StringID, *client.RpcClient] + var nodes []mn.Node[mn.StringID, *client.RpcClient] for i, nodeInfo := range cfg.ListNodes() { // create client and check @@ -67,20 +68,20 @@ func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr continue } - newNode := mn.NewNode[client.StringID, *client.Head, *client.RpcClient]( + newNode := mn.NewNode[mn.StringID, *client.Head, *client.RpcClient]( mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, - int32(i), client.StringID(id), 0, rpcClient, chainFamily) + int32(i), mn.StringID(id), 0, rpcClient, chainFamily) nodes = append(nodes, newNode) } - multiNode := mn.NewMultiNode[client.StringID, *client.RpcClient]( + multiNode := mn.NewMultiNode[mn.StringID, *client.RpcClient]( lggr, mn.NodeSelectionModeRoundRobin, time.Duration(0), // TODO: set lease duration nodes, - []mn.SendOnlyNode[client.StringID, *client.RpcClient]{}, // TODO: no send only nodes? - client.StringID(id), + []mn.SendOnlyNode[mn.StringID, *client.RpcClient]{}, // TODO: no send only nodes? + mn.StringID(id), chainFamily, time.Duration(0), // TODO: set deathDeclarationDelay ) @@ -89,9 +90,9 @@ func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) } - txSender := mn.NewTransactionSender[*solanago.Transaction, client.StringID, *client.RpcClient]( + txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RpcClient]( lggr, - client.StringID(id), + mn.StringID(id), chainFamily, multiNode, classifySendError, diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index d007e3c4c..d2294824d 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -6,6 +6,8 @@ import ( "fmt" "time" + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" + "github.com/gagliardetto/solana-go" "github.com/gagliardetto/solana-go/rpc" "golang.org/x/sync/singleflight" @@ -33,7 +35,7 @@ type Reader interface { Balance(addr solana.PublicKey) (uint64, error) SlotHeight() (uint64, error) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) - ChainID(ctx context.Context) (StringID, error) + ChainID(ctx context.Context) (mn.StringID, error) GetFeeForMessage(msg string) (uint64, error) GetLatestBlock() (*rpc.GetBlockResult, error) } @@ -142,7 +144,7 @@ func (c *Client) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { return v.(*rpc.GetLatestBlockhashResult), err } -func (c *Client) ChainID(ctx context.Context) (StringID, error) { +func (c *Client) ChainID(ctx context.Context) (mn.StringID, error) { done := c.latency("chain_id") defer done() @@ -168,7 +170,7 @@ func (c *Client) ChainID(ctx context.Context) (StringID, error) { c.log.Warnf("unknown genesis hash - assuming solana chain is 'localnet'") network = "localnet" } - return StringID(network), nil + return mn.StringID(network), nil } func (c *Client) GetFeeForMessage(msg string) (uint64, error) { diff --git a/pkg/solana/client/client_test.go b/pkg/solana/client/client_test.go index 6f2276bd3..ed4e1dba4 100644 --- a/pkg/solana/client/client_test.go +++ b/pkg/solana/client/client_test.go @@ -19,6 +19,7 @@ import ( "github.com/stretchr/testify/require" "github.com/smartcontractkit/chainlink-common/pkg/logger" + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" @@ -122,7 +123,7 @@ func TestClient_Reader_ChainID(t *testing.T) { for _, n := range networks { network, err := c.ChainID(context.Background()) assert.NoError(t, err) - assert.Equal(t, n, network) + assert.Equal(t, mn.StringID(n), network) } } diff --git a/pkg/solana/client/mocks/ReaderWriter.go b/pkg/solana/client/mocks/ReaderWriter.go index 2bbb82fef..b6cd6808a 100644 --- a/pkg/solana/client/mocks/ReaderWriter.go +++ b/pkg/solana/client/mocks/ReaderWriter.go @@ -6,6 +6,7 @@ import ( context "context" rpc "github.com/gagliardetto/solana-go/rpc" + multinode "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" mock "github.com/stretchr/testify/mock" solana "github.com/gagliardetto/solana-go" @@ -44,27 +45,27 @@ func (_m *ReaderWriter) Balance(addr solana.PublicKey) (uint64, error) { return r0, r1 } -// ChainID provides a mock function with given fields: -func (_m *ReaderWriter) ChainID() (string, error) { - ret := _m.Called() +// ChainID provides a mock function with given fields: ctx +func (_m *ReaderWriter) ChainID(ctx context.Context) (multinode.StringID, error) { + ret := _m.Called(ctx) if len(ret) == 0 { panic("no return value specified for ChainID") } - var r0 string + var r0 multinode.StringID var r1 error - if rf, ok := ret.Get(0).(func() (string, error)); ok { - return rf() + if rf, ok := ret.Get(0).(func(context.Context) (multinode.StringID, error)); ok { + return rf(ctx) } - if rf, ok := ret.Get(0).(func() string); ok { - r0 = rf() + if rf, ok := ret.Get(0).(func(context.Context) multinode.StringID); ok { + r0 = rf(ctx) } else { - r0 = ret.Get(0).(string) + r0 = ret.Get(0).(multinode.StringID) } - if rf, ok := ret.Get(1).(func() error); ok { - r1 = rf() + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) } else { r1 = ret.Error(1) } diff --git a/pkg/solana/client/multinode/poller_test.go b/pkg/solana/client/multinode/poller_test.go deleted file mode 100644 index 91af57930..000000000 --- a/pkg/solana/client/multinode/poller_test.go +++ /dev/null @@ -1,187 +0,0 @@ -package client - -import ( - "context" - "fmt" - "math/big" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" -) - -func Test_Poller(t *testing.T) { - lggr := logger.Test(t) - - t.Run("Test multiple start", func(t *testing.T) { - pollFunc := func(ctx context.Context) (Head, error) { - return nil, nil - } - - poller, _ := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) - err := poller.Start() - require.NoError(t, err) - - err = poller.Start() - require.Error(t, err) - poller.Unsubscribe() - }) - - t.Run("Test polling for heads", func(t *testing.T) { - // Mock polling function that returns a new value every time it's called - var pollNumber int - pollLock := sync.Mutex{} - pollFunc := func(ctx context.Context) (Head, error) { - pollLock.Lock() - defer pollLock.Unlock() - pollNumber++ - h := head{ - BlockNumber: int64(pollNumber), - BlockDifficulty: big.NewInt(int64(pollNumber)), - } - return h.ToMockHead(t), nil - } - - // Create poller and start to receive data - poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) - require.NoError(t, poller.Start()) - defer poller.Unsubscribe() - - // Receive updates from the poller - pollCount := 0 - pollMax := 50 - for ; pollCount < pollMax; pollCount++ { - h := <-channel - assert.Equal(t, int64(pollCount+1), h.BlockNumber()) - } - }) - - t.Run("Test polling errors", func(t *testing.T) { - // Mock polling function that returns an error - var pollNumber int - pollLock := sync.Mutex{} - pollFunc := func(ctx context.Context) (Head, error) { - pollLock.Lock() - defer pollLock.Unlock() - pollNumber++ - return nil, fmt.Errorf("polling error %d", pollNumber) - } - - olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - - // Create poller and subscribe to receive data - poller, _ := NewPoller[Head](time.Millisecond, pollFunc, time.Second, olggr) - require.NoError(t, poller.Start()) - defer poller.Unsubscribe() - - // Ensure that all errors were logged as expected - logsSeen := func() bool { - for pollCount := 0; pollCount < 50; pollCount++ { - numLogs := observedLogs.FilterMessage(fmt.Sprintf("polling error: polling error %d", pollCount+1)).Len() - if numLogs != 1 { - return false - } - } - return true - } - require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) - }) - - t.Run("Test polling timeout", func(t *testing.T) { - pollFunc := func(ctx context.Context) (Head, error) { - if <-ctx.Done(); true { - return nil, ctx.Err() - } - return nil, nil - } - - // Set instant timeout - pollingTimeout := time.Duration(0) - - olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - - // Create poller and subscribe to receive data - poller, _ := NewPoller[Head](time.Millisecond, pollFunc, pollingTimeout, olggr) - require.NoError(t, poller.Start()) - defer poller.Unsubscribe() - - // Ensure that timeout errors were logged as expected - logsSeen := func() bool { - return observedLogs.FilterMessage("polling error: context deadline exceeded").Len() >= 1 - } - require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) - }) - - t.Run("Test unsubscribe during polling", func(t *testing.T) { - wait := make(chan struct{}) - closeOnce := sync.OnceFunc(func() { close(wait) }) - pollFunc := func(ctx context.Context) (Head, error) { - closeOnce() - // Block in polling function until context is cancelled - if <-ctx.Done(); true { - return nil, ctx.Err() - } - return nil, nil - } - - // Set long timeout - pollingTimeout := time.Minute - - olggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - - // Create poller and subscribe to receive data - poller, _ := NewPoller[Head](time.Millisecond, pollFunc, pollingTimeout, olggr) - require.NoError(t, poller.Start()) - - // Unsubscribe while blocked in polling function - <-wait - poller.Unsubscribe() - - // Ensure error was logged - logsSeen := func() bool { - return observedLogs.FilterMessage("polling error: context canceled").Len() >= 1 - } - require.Eventually(t, logsSeen, tests.WaitTimeout(t), 100*time.Millisecond) - }) -} - -func Test_Poller_Unsubscribe(t *testing.T) { - lggr := logger.Test(t) - pollFunc := func(ctx context.Context) (Head, error) { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - h := head{ - BlockNumber: 0, - BlockDifficulty: big.NewInt(0), - } - return h.ToMockHead(t), nil - } - } - - t.Run("Test multiple unsubscribe", func(t *testing.T) { - poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) - err := poller.Start() - require.NoError(t, err) - - <-channel - poller.Unsubscribe() - poller.Unsubscribe() - }) - - t.Run("Read channel after unsubscribe", func(t *testing.T) { - poller, channel := NewPoller[Head](time.Millisecond, pollFunc, time.Second, lggr) - err := poller.Start() - require.NoError(t, err) - - poller.Unsubscribe() - require.Equal(t, <-channel, nil) - }) -} diff --git a/pkg/solana/client/multinode/send_only_node_test.go b/pkg/solana/client/multinode/send_only_node_test.go deleted file mode 100644 index 352fb5b92..000000000 --- a/pkg/solana/client/multinode/send_only_node_test.go +++ /dev/null @@ -1,139 +0,0 @@ -package client - -import ( - "errors" - "fmt" - "net/url" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" - - "github.com/smartcontractkit/chainlink/v2/common/types" -) - -func TestNewSendOnlyNode(t *testing.T) { - t.Parallel() - - urlFormat := "http://user:%s@testurl.com" - password := "pass" - u, err := url.Parse(fmt.Sprintf(urlFormat, password)) - require.NoError(t, err) - redacted := fmt.Sprintf(urlFormat, "xxxxx") - lggr := logger.Test(t) - name := "TestNewSendOnlyNode" - chainID := types.RandomID() - client := newMockSendOnlyClient[types.ID](t) - - node := NewSendOnlyNode(lggr, *u, name, chainID, client) - assert.NotNil(t, node) - - // Must contain name & url with redacted password - assert.Contains(t, node.String(), fmt.Sprintf("%s:%s", name, redacted)) - assert.Equal(t, node.ConfiguredChainID(), chainID) -} - -func TestStartSendOnlyNode(t *testing.T) { - t.Parallel() - t.Run("becomes unusable if initial dial fails", func(t *testing.T) { - t.Parallel() - lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - client := newMockSendOnlyClient[types.ID](t) - client.On("Close").Once() - expectedError := errors.New("some http error") - client.On("Dial", mock.Anything).Return(expectedError).Once() - s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), types.RandomID(), client) - - defer func() { assert.NoError(t, s.Close()) }() - err := s.Start(tests.Context(t)) - require.NoError(t, err) - - assert.Equal(t, NodeStateUnusable, s.State()) - tests.RequireLogMessage(t, observedLogs, "Dial failed: SendOnly Node is unusable") - }) - t.Run("Default ChainID(0) produces warn and skips checks", func(t *testing.T) { - t.Parallel() - lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - client := newMockSendOnlyClient[types.ID](t) - client.On("Close").Once() - client.On("Dial", mock.Anything).Return(nil).Once() - s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), types.NewIDFromInt(0), client) - - defer func() { assert.NoError(t, s.Close()) }() - err := s.Start(tests.Context(t)) - require.NoError(t, err) - - assert.Equal(t, NodeStateAlive, s.State()) - tests.RequireLogMessage(t, observedLogs, "sendonly rpc ChainID verification skipped") - }) - t.Run("Can recover from chainID verification failure", func(t *testing.T) { - t.Parallel() - lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - client := newMockSendOnlyClient[types.ID](t) - client.On("Close").Once() - client.On("Dial", mock.Anything).Return(nil) - expectedError := errors.New("failed to get chain ID") - chainID := types.RandomID() - const failuresCount = 2 - client.On("ChainID", mock.Anything).Return(types.RandomID(), expectedError).Times(failuresCount) - client.On("ChainID", mock.Anything).Return(chainID, nil) - - s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), chainID, client) - - defer func() { assert.NoError(t, s.Close()) }() - err := s.Start(tests.Context(t)) - require.NoError(t, err) - - assert.Equal(t, NodeStateUnreachable, s.State()) - tests.AssertLogCountEventually(t, observedLogs, fmt.Sprintf("Verify failed: %v", expectedError), failuresCount) - tests.AssertEventually(t, func() bool { - return s.State() == NodeStateAlive - }) - }) - t.Run("Can recover from chainID mismatch", func(t *testing.T) { - t.Parallel() - lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - client := newMockSendOnlyClient[types.ID](t) - client.On("Close").Once() - client.On("Dial", mock.Anything).Return(nil).Once() - configuredChainID := types.NewIDFromInt(11) - rpcChainID := types.NewIDFromInt(20) - const failuresCount = 2 - client.On("ChainID", mock.Anything).Return(rpcChainID, nil).Times(failuresCount) - client.On("ChainID", mock.Anything).Return(configuredChainID, nil) - s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), configuredChainID, client) - - defer func() { assert.NoError(t, s.Close()) }() - err := s.Start(tests.Context(t)) - require.NoError(t, err) - - assert.Equal(t, NodeStateInvalidChainID, s.State()) - tests.AssertLogCountEventually(t, observedLogs, "sendonly rpc ChainID doesn't match local chain ID", failuresCount) - tests.AssertEventually(t, func() bool { - return s.State() == NodeStateAlive - }) - }) - t.Run("Start with Random ChainID", func(t *testing.T) { - t.Parallel() - lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) - client := newMockSendOnlyClient[types.ID](t) - client.On("Close").Once() - client.On("Dial", mock.Anything).Return(nil).Once() - configuredChainID := types.RandomID() - client.On("ChainID", mock.Anything).Return(configuredChainID, nil) - s := NewSendOnlyNode(lggr, url.URL{}, t.Name(), configuredChainID, client) - - defer func() { assert.NoError(t, s.Close()) }() - err := s.Start(tests.Context(t)) - assert.NoError(t, err) - tests.AssertEventually(t, func() bool { - return s.State() == NodeStateAlive - }) - assert.Equal(t, 0, observedLogs.Len()) // No warnings expected - }) -} diff --git a/pkg/solana/client/multinode/transaction_sender_test.go b/pkg/solana/client/multinode/transaction_sender_test.go deleted file mode 100644 index e4387abee..000000000 --- a/pkg/solana/client/multinode/transaction_sender_test.go +++ /dev/null @@ -1,360 +0,0 @@ -package client - -import ( - "context" - "fmt" - "testing" - - "github.com/pkg/errors" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" - "github.com/smartcontractkit/chainlink/v2/common/types" -) - -type sendTxMultiNode struct { - *MultiNode[types.ID, SendTxRPCClient[any]] -} - -type sendTxRPC struct { - sendTxRun func(args mock.Arguments) - sendTxErr error -} - -var _ SendTxRPCClient[any] = (*sendTxRPC)(nil) - -func newSendTxRPC(sendTxErr error, sendTxRun func(args mock.Arguments)) *sendTxRPC { - return &sendTxRPC{sendTxErr: sendTxErr, sendTxRun: sendTxRun} -} - -func (rpc *sendTxRPC) SendTransaction(ctx context.Context, _ any) error { - if rpc.sendTxRun != nil { - rpc.sendTxRun(mock.Arguments{ctx}) - } - return rpc.sendTxErr -} - -func newTestTransactionSender(t *testing.T, chainID types.ID, lggr logger.Logger, - nodes []Node[types.ID, SendTxRPCClient[any]], - sendOnlyNodes []SendOnlyNode[types.ID, SendTxRPCClient[any]], -) (*sendTxMultiNode, *TransactionSender[any, types.ID, SendTxRPCClient[any]]) { - mn := sendTxMultiNode{NewMultiNode[types.ID, SendTxRPCClient[any]]( - lggr, NodeSelectionModeRoundRobin, 0, nodes, sendOnlyNodes, chainID, "chainFamily", 0)} - err := mn.StartOnce("startedTestMultiNode", func() error { return nil }) - require.NoError(t, err) - - txSender := NewTransactionSender[any, types.ID, SendTxRPCClient[any]](lggr, chainID, mn.chainFamily, mn.MultiNode, classifySendTxError, tests.TestInterval) - err = txSender.Start(tests.Context(t)) - require.NoError(t, err) - - t.Cleanup(func() { - err := mn.Close() - if err != nil { - // Allow MultiNode to be closed early for testing - require.EqualError(t, err, "MultiNode has already been stopped: already stopped") - } - err = txSender.Close() - if err != nil { - // Allow TransactionSender to be closed early for testing - require.EqualError(t, err, "TransactionSender has already been stopped: already stopped") - } - }) - return &mn, txSender -} - -func classifySendTxError(_ any, err error) SendTxReturnCode { - if err != nil { - return Fatal - } - return Successful -} - -func TestTransactionSender_SendTransaction(t *testing.T) { - t.Parallel() - - newNodeWithState := func(t *testing.T, state NodeState, txErr error, sendTxRun func(args mock.Arguments)) *mockNode[types.ID, SendTxRPCClient[any]] { - rpc := newSendTxRPC(txErr, sendTxRun) - node := newMockNode[types.ID, SendTxRPCClient[any]](t) - node.On("String").Return("node name").Maybe() - node.On("RPC").Return(rpc).Maybe() - node.On("State").Return(state).Maybe() - node.On("Close").Return(nil).Once() - return node - } - - newNode := func(t *testing.T, txErr error, sendTxRun func(args mock.Arguments)) *mockNode[types.ID, SendTxRPCClient[any]] { - return newNodeWithState(t, NodeStateAlive, txErr, sendTxRun) - } - - t.Run("Fails if there is no nodes available", func(t *testing.T) { - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, nil, nil) - _, err := txSender.SendTransaction(tests.Context(t), nil) - assert.EqualError(t, err, ErroringNodeError.Error()) - }) - - t.Run("Transaction failure happy path", func(t *testing.T) { - expectedError := errors.New("transaction failed") - mainNode := newNode(t, expectedError, nil) - lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, - []Node[types.ID, SendTxRPCClient[any]]{mainNode}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{newNode(t, errors.New("unexpected error"), nil)}) - - result, sendErr := txSender.SendTransaction(tests.Context(t), nil) - require.ErrorIs(t, sendErr, expectedError) - require.Equal(t, Fatal, result) - tests.AssertLogCountEventually(t, observedLogs, "Node sent transaction", 2) - tests.AssertLogCountEventually(t, observedLogs, "RPC returned error", 2) - }) - - t.Run("Transaction success happy path", func(t *testing.T) { - mainNode := newNode(t, nil, nil) - - lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) - _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, - []Node[types.ID, SendTxRPCClient[any]]{mainNode}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{newNode(t, errors.New("unexpected error"), nil)}) - - result, sendErr := txSender.SendTransaction(tests.Context(t), nil) - require.NoError(t, sendErr) - require.Equal(t, Successful, result) - tests.AssertLogCountEventually(t, observedLogs, "Node sent transaction", 2) - tests.AssertLogCountEventually(t, observedLogs, "RPC returned error", 1) - }) - - t.Run("Context expired before collecting sufficient results", func(t *testing.T) { - testContext, testCancel := context.WithCancel(tests.Context(t)) - defer testCancel() - - mainNode := newNode(t, nil, func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, types.RandomID(), lggr, - []Node[types.ID, SendTxRPCClient[any]]{mainNode}, nil) - - requestContext, cancel := context.WithCancel(tests.Context(t)) - cancel() - _, sendErr := txSender.SendTransaction(requestContext, nil) - require.EqualError(t, sendErr, "context canceled") - }) - - t.Run("Soft timeout stops results collection", func(t *testing.T) { - chainID := types.RandomID() - expectedError := errors.New("transaction failed") - fastNode := newNode(t, expectedError, nil) - - // hold reply from the node till end of the test - testContext, testCancel := context.WithCancel(tests.Context(t)) - defer testCancel() - slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, chainID, lggr, []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, nil) - _, sendErr := txSender.SendTransaction(tests.Context(t), nil) - require.EqualError(t, sendErr, expectedError.Error()) - }) - t.Run("Fails when multinode is closed", func(t *testing.T) { - chainID := types.RandomID() - fastNode := newNode(t, nil, nil) - // hold reply from the node till end of the test - testContext, testCancel := context.WithCancel(tests.Context(t)) - defer testCancel() - slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - slowSendOnly := newNode(t, errors.New("send only failed"), func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - mn, txSender := newTestTransactionSender(t, chainID, lggr, - []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{slowSendOnly}) - - require.NoError(t, mn.Close()) - _, err := txSender.SendTransaction(tests.Context(t), nil) - require.EqualError(t, err, "MultiNode is stopped") - }) - t.Run("Fails when closed", func(t *testing.T) { - chainID := types.RandomID() - fastNode := newNode(t, nil, nil) - // hold reply from the node till end of the test - testContext, testCancel := context.WithCancel(tests.Context(t)) - defer testCancel() - slowNode := newNode(t, errors.New("transaction failed"), func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - slowSendOnly := newNode(t, errors.New("send only failed"), func(_ mock.Arguments) { - // block caller til end of the test - <-testContext.Done() - }) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, chainID, lggr, - []Node[types.ID, SendTxRPCClient[any]]{fastNode, slowNode}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{slowSendOnly}) - - require.NoError(t, txSender.Close()) - _, err := txSender.SendTransaction(tests.Context(t), nil) - require.EqualError(t, err, "context canceled") - }) - t.Run("Returns error if there is no healthy primary nodes", func(t *testing.T) { - chainID := types.RandomID() - primary := newNodeWithState(t, NodeStateUnreachable, nil, nil) - sendOnly := newNodeWithState(t, NodeStateUnreachable, nil, nil) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, chainID, lggr, - []Node[types.ID, SendTxRPCClient[any]]{primary}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{sendOnly}) - - _, sendErr := txSender.SendTransaction(tests.Context(t), nil) - assert.EqualError(t, sendErr, ErroringNodeError.Error()) - }) - - t.Run("Transaction success even if one of the nodes is unhealthy", func(t *testing.T) { - chainID := types.RandomID() - mainNode := newNode(t, nil, nil) - unexpectedCall := func(args mock.Arguments) { - panic("SendTx must not be called for unhealthy node") - } - unhealthyNode := newNodeWithState(t, NodeStateUnreachable, nil, unexpectedCall) - unhealthySendOnlyNode := newNodeWithState(t, NodeStateUnreachable, nil, unexpectedCall) - - lggr, _ := logger.TestObserved(t, zap.DebugLevel) - - _, txSender := newTestTransactionSender(t, chainID, lggr, - []Node[types.ID, SendTxRPCClient[any]]{mainNode, unhealthyNode}, - []SendOnlyNode[types.ID, SendTxRPCClient[any]]{unhealthySendOnlyNode}) - - returnCode, sendErr := txSender.SendTransaction(tests.Context(t), nil) - require.NoError(t, sendErr) - require.Equal(t, Successful, returnCode) - }) -} - -func TestTransactionSender_SendTransaction_aggregateTxResults(t *testing.T) { - t.Parallel() - // ensure failure on new SendTxReturnCode - codesToCover := map[SendTxReturnCode]struct{}{} - for code := Successful; code < sendTxReturnCodeLen; code++ { - codesToCover[code] = struct{}{} - } - - testCases := []struct { - Name string - ExpectedTxResult string - ExpectedCriticalErr string - ResultsByCode sendTxErrors - }{ - { - Name: "Returns success and logs critical error on success and Fatal", - ExpectedTxResult: "success", - ExpectedCriticalErr: "found contradictions in nodes replies on SendTransaction: got success and severe error", - ResultsByCode: sendTxErrors{ - Successful: {errors.New("success")}, - Fatal: {errors.New("fatal")}, - }, - }, - { - Name: "Returns TransactionAlreadyKnown and logs critical error on TransactionAlreadyKnown and Fatal", - ExpectedTxResult: "tx_already_known", - ExpectedCriticalErr: "found contradictions in nodes replies on SendTransaction: got success and severe error", - ResultsByCode: sendTxErrors{ - TransactionAlreadyKnown: {errors.New("tx_already_known")}, - Unsupported: {errors.New("unsupported")}, - }, - }, - { - Name: "Prefers sever error to temporary", - ExpectedTxResult: "underpriced", - ExpectedCriticalErr: "", - ResultsByCode: sendTxErrors{ - Retryable: {errors.New("retryable")}, - Underpriced: {errors.New("underpriced")}, - }, - }, - { - Name: "Returns temporary error", - ExpectedTxResult: "retryable", - ExpectedCriticalErr: "", - ResultsByCode: sendTxErrors{ - Retryable: {errors.New("retryable")}, - }, - }, - { - Name: "Insufficient funds is treated as error", - ExpectedTxResult: "", - ExpectedCriticalErr: "", - ResultsByCode: sendTxErrors{ - Successful: {nil}, - InsufficientFunds: {errors.New("insufficientFunds")}, - }, - }, - { - Name: "Logs critical error on empty ResultsByCode", - ExpectedTxResult: "expected at least one response on SendTransaction", - ExpectedCriticalErr: "expected at least one response on SendTransaction", - ResultsByCode: sendTxErrors{}, - }, - { - Name: "Zk terminally stuck", - ExpectedTxResult: "not enough keccak counters to continue the execution", - ExpectedCriticalErr: "", - ResultsByCode: sendTxErrors{ - TerminallyStuck: {errors.New("not enough keccak counters to continue the execution")}, - }, - }, - } - - for _, testCase := range testCases { - for code := range testCase.ResultsByCode { - delete(codesToCover, code) - } - - t.Run(testCase.Name, func(t *testing.T) { - _, txResult, err := aggregateTxResults(testCase.ResultsByCode) - if testCase.ExpectedTxResult == "" { - assert.NoError(t, err) - } else { - assert.EqualError(t, txResult, testCase.ExpectedTxResult) - } - - logger.Sugared(logger.Test(t)).Info("Map: " + fmt.Sprint(testCase.ResultsByCode)) - logger.Sugared(logger.Test(t)).Criticalw("observed invariant violation on SendTransaction", "resultsByCode", testCase.ResultsByCode, "err", err) - - if testCase.ExpectedCriticalErr == "" { - assert.NoError(t, err) - } else { - assert.EqualError(t, err, testCase.ExpectedCriticalErr) - } - }) - } - - // explicitly signal that following codes are properly handled in aggregateTxResults, - // but dedicated test cases won't be beneficial - for _, codeToIgnore := range []SendTxReturnCode{Unknown, ExceedsMaxFee, FeeOutOfValidRange} { - delete(codesToCover, codeToIgnore) - } - assert.Empty(t, codesToCover, "all of the SendTxReturnCode must be covered by this test") -} diff --git a/pkg/solana/client/multinode/types.go b/pkg/solana/client/multinode/types.go index 6c863c867..51b70e573 100644 --- a/pkg/solana/client/multinode/types.go +++ b/pkg/solana/client/multinode/types.go @@ -10,6 +10,13 @@ import ( // It should be convertible to a string, that can uniquely identify this chain type ID fmt.Stringer +// StringID enables using string directly as a ChainID +type StringID string + +func (s StringID) String() string { + return string(s) +} + // Subscription represents an event subscription where events are // delivered on a data channel. // This is a generic interface for Subscription to represent used by clients. diff --git a/pkg/solana/client/rpc_client.go b/pkg/solana/client/rpc_client.go index ec89d2b66..424c48b67 100644 --- a/pkg/solana/client/rpc_client.go +++ b/pkg/solana/client/rpc_client.go @@ -18,12 +18,6 @@ import ( "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" ) -type StringID string - -func (s StringID) String() string { - return string(s) -} - var _ ReaderWriter = (*RpcClient)(nil) type Head struct { @@ -178,7 +172,7 @@ func (c *RpcClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { return v.(*rpc.GetLatestBlockhashResult), err } -func (c *RpcClient) ChainID(ctx context.Context) (StringID, error) { +func (c *RpcClient) ChainID(ctx context.Context) (mn.StringID, error) { done := c.latency("chain_id") defer done() @@ -204,7 +198,7 @@ func (c *RpcClient) ChainID(ctx context.Context) (StringID, error) { c.log.Warnf("unknown genesis hash - assuming solana chain is 'localnet'") network = "localnet" } - return StringID(network), nil + return mn.StringID(network), nil } func (c *RpcClient) GetFeeForMessage(msg string) (uint64, error) { From 354dc50a80d185a9ea53d166105fc07565a07dfb Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 4 Sep 2024 12:52:52 -0400 Subject: [PATCH 07/22] tidy --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 12c2ea189..c88476699 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/go-viper/mapstructure/v2 v2.1.0 github.com/google/uuid v1.6.0 github.com/hashicorp/go-plugin v1.6.2-0.20240829161738-06afb6d7ae99 + github.com/jpillora/backoff v1.0.0 github.com/pelletier/go-toml/v2 v2.2.0 github.com/prometheus/client_golang v1.17.0 github.com/smartcontractkit/chainlink-common v0.2.2-0.20240829145110-4a45c426fbe8 @@ -55,7 +56,6 @@ require ( github.com/hashicorp/go-hclog v1.5.0 // indirect github.com/hashicorp/yamux v0.1.1 // indirect github.com/invopop/jsonschema v0.12.0 // indirect - github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.15.15 // indirect github.com/kr/pretty v0.3.1 // indirect From 60c33524323aa898121cd5d9dc3613392c030c89 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 4 Sep 2024 13:00:05 -0400 Subject: [PATCH 08/22] Update client_test.go --- pkg/solana/client/client_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/solana/client/client_test.go b/pkg/solana/client/client_test.go index ed4e1dba4..f41f4773a 100644 --- a/pkg/solana/client/client_test.go +++ b/pkg/solana/client/client_test.go @@ -79,7 +79,7 @@ func TestClient_Reader_Integration(t *testing.T) { // get chain ID based on gensis hash network, err := c.ChainID(context.Background()) assert.NoError(t, err) - assert.Equal(t, "localnet", network) + assert.Equal(t, mn.StringID("localnet"), network) // get account info (also tested inside contract_test) res, err := c.GetAccountInfoWithOpts(context.TODO(), solana.PublicKey{}, &rpc.GetAccountInfoOpts{Commitment: rpc.CommitmentFinalized}) From 8e2306bfc12dbbf6bdce8604577b04a777478dd3 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 4 Sep 2024 13:06:44 -0400 Subject: [PATCH 09/22] lint --- pkg/solana/chain_multinode.go | 19 +++++------- pkg/solana/client/client_test.go | 2 +- pkg/solana/client/rpc_client.go | 50 ++++++++++++++++---------------- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/pkg/solana/chain_multinode.go b/pkg/solana/chain_multinode.go index cb7161a55..d8e4f133c 100644 --- a/pkg/solana/chain_multinode.go +++ b/pkg/solana/chain_multinode.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "math/big" - "sync" "time" solanago "github.com/gagliardetto/solana-go" @@ -40,13 +39,11 @@ type multiNodeChain struct { services.StateMachine id string cfg *config.TOMLConfig - multiNode *mn.MultiNode[mn.StringID, *client.RpcClient] - txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RpcClient] + multiNode *mn.MultiNode[mn.StringID, *client.RPCClient] + txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient] txm *txm.Txm balanceMonitor services.Service lggr logger.Logger - - clientLock sync.RWMutex } func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.Logger) (*multiNodeChain, error) { @@ -58,29 +55,29 @@ func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr mnCfg := cfg.MultiNodeConfig() - var nodes []mn.Node[mn.StringID, *client.RpcClient] + var nodes []mn.Node[mn.StringID, *client.RPCClient] for i, nodeInfo := range cfg.ListNodes() { // create client and check - rpcClient, err := client.NewRpcClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) + rpcClient, err := client.NewRPCClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) if err != nil { lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) continue } - newNode := mn.NewNode[mn.StringID, *client.Head, *client.RpcClient]( + newNode := mn.NewNode[mn.StringID, *client.Head, *client.RPCClient]( mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, int32(i), mn.StringID(id), 0, rpcClient, chainFamily) nodes = append(nodes, newNode) } - multiNode := mn.NewMultiNode[mn.StringID, *client.RpcClient]( + multiNode := mn.NewMultiNode[mn.StringID, *client.RPCClient]( lggr, mn.NodeSelectionModeRoundRobin, time.Duration(0), // TODO: set lease duration nodes, - []mn.SendOnlyNode[mn.StringID, *client.RpcClient]{}, // TODO: no send only nodes? + []mn.SendOnlyNode[mn.StringID, *client.RPCClient]{}, // TODO: no send only nodes? mn.StringID(id), chainFamily, time.Duration(0), // TODO: set deathDeclarationDelay @@ -90,7 +87,7 @@ func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) } - txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RpcClient]( + txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient]( lggr, mn.StringID(id), chainFamily, diff --git a/pkg/solana/client/client_test.go b/pkg/solana/client/client_test.go index f41f4773a..6a4feb61f 100644 --- a/pkg/solana/client/client_test.go +++ b/pkg/solana/client/client_test.go @@ -19,8 +19,8 @@ import ( "github.com/stretchr/testify/require" "github.com/smartcontractkit/chainlink-common/pkg/logger" - mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" + mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" ) diff --git a/pkg/solana/client/rpc_client.go b/pkg/solana/client/rpc_client.go index 424c48b67..c9ceeab6a 100644 --- a/pkg/solana/client/rpc_client.go +++ b/pkg/solana/client/rpc_client.go @@ -18,7 +18,7 @@ import ( "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" ) -var _ ReaderWriter = (*RpcClient)(nil) +var _ ReaderWriter = (*RPCClient)(nil) type Head struct { rpc.GetBlockResult @@ -39,7 +39,7 @@ func (h *Head) IsValid() bool { return true } -type RpcClient struct { +type RPCClient struct { url string rpc *rpc.Client skipPreflight bool // to enable or disable preflight checks @@ -55,48 +55,48 @@ type RpcClient struct { // TODO: BCI-4061: Implement RPC Client for MultiNode -func (c *RpcClient) Dial(ctx context.Context) error { +func (c *RPCClient) Dial(ctx context.Context) error { //TODO implement me panic("implement me") } -func (c *RpcClient) SubscribeToHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { +func (c *RPCClient) SubscribeToHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { //TODO implement me panic("implement me") } -func (c *RpcClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { +func (c *RPCClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { //TODO implement me panic("implement me") } -func (c *RpcClient) Ping(ctx context.Context) error { +func (c *RPCClient) Ping(ctx context.Context) error { //TODO implement me panic("implement me") } -func (c *RpcClient) IsSyncing(ctx context.Context) (bool, error) { +func (c *RPCClient) IsSyncing(ctx context.Context) (bool, error) { //TODO implement me panic("implement me") } -func (c *RpcClient) UnsubscribeAllExcept(subs ...mn.Subscription) { +func (c *RPCClient) UnsubscribeAllExcept(subs ...mn.Subscription) { //TODO implement me panic("implement me") } -func (c *RpcClient) Close() { +func (c *RPCClient) Close() { //TODO implement me panic("implement me") } -func (c *RpcClient) GetInterceptedChainInfo() (latest, highestUserObservations mn.ChainInfo) { +func (c *RPCClient) GetInterceptedChainInfo() (latest, highestUserObservations mn.ChainInfo) { //TODO implement me panic("implement me") } -func NewRpcClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*RpcClient, error) { - return &RpcClient{ +func NewRPCClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*RPCClient, error) { + return &RPCClient{ url: endpoint, rpc: rpc.New(endpoint), skipPreflight: cfg.SkipPreflight(), @@ -109,14 +109,14 @@ func NewRpcClient(endpoint string, cfg config.Config, requestTimeout time.Durati }, nil } -func (c *RpcClient) latency(name string) func() { +func (c *RPCClient) latency(name string) func() { start := time.Now() return func() { monitor.SetClientLatency(time.Since(start), name, c.url) } } -func (c *RpcClient) Balance(addr solana.PublicKey) (uint64, error) { +func (c *RPCClient) Balance(addr solana.PublicKey) (uint64, error) { done := c.latency("balance") defer done() @@ -133,11 +133,11 @@ func (c *RpcClient) Balance(addr solana.PublicKey) (uint64, error) { return res.Value, err } -func (c *RpcClient) SlotHeight() (uint64, error) { +func (c *RPCClient) SlotHeight() (uint64, error) { return c.SlotHeightWithCommitment(rpc.CommitmentProcessed) // get the latest slot height } -func (c *RpcClient) SlotHeightWithCommitment(commitment rpc.CommitmentType) (uint64, error) { +func (c *RPCClient) SlotHeightWithCommitment(commitment rpc.CommitmentType) (uint64, error) { done := c.latency("slot_height") defer done() @@ -149,7 +149,7 @@ func (c *RpcClient) SlotHeightWithCommitment(commitment rpc.CommitmentType) (uin return v.(uint64), err } -func (c *RpcClient) GetAccountInfoWithOpts(ctx context.Context, addr solana.PublicKey, opts *rpc.GetAccountInfoOpts) (*rpc.GetAccountInfoResult, error) { +func (c *RPCClient) GetAccountInfoWithOpts(ctx context.Context, addr solana.PublicKey, opts *rpc.GetAccountInfoOpts) (*rpc.GetAccountInfoResult, error) { done := c.latency("account_info") defer done() @@ -159,7 +159,7 @@ func (c *RpcClient) GetAccountInfoWithOpts(ctx context.Context, addr solana.Publ return c.rpc.GetAccountInfoWithOpts(ctx, addr, opts) } -func (c *RpcClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { +func (c *RPCClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { done := c.latency("latest_blockhash") defer done() @@ -172,7 +172,7 @@ func (c *RpcClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { return v.(*rpc.GetLatestBlockhashResult), err } -func (c *RpcClient) ChainID(ctx context.Context) (mn.StringID, error) { +func (c *RPCClient) ChainID(ctx context.Context) (mn.StringID, error) { done := c.latency("chain_id") defer done() @@ -201,7 +201,7 @@ func (c *RpcClient) ChainID(ctx context.Context) (mn.StringID, error) { return mn.StringID(network), nil } -func (c *RpcClient) GetFeeForMessage(msg string) (uint64, error) { +func (c *RPCClient) GetFeeForMessage(msg string) (uint64, error) { done := c.latency("fee_for_message") defer done() @@ -221,7 +221,7 @@ func (c *RpcClient) GetFeeForMessage(msg string) (uint64, error) { } // https://docs.solana.com/developing/clients/jsonrpc-api#getsignaturestatuses -func (c *RpcClient) SignatureStatuses(ctx context.Context, sigs []solana.Signature) ([]*rpc.SignatureStatusesResult, error) { +func (c *RPCClient) SignatureStatuses(ctx context.Context, sigs []solana.Signature) ([]*rpc.SignatureStatusesResult, error) { done := c.latency("signature_statuses") defer done() @@ -242,7 +242,7 @@ func (c *RpcClient) SignatureStatuses(ctx context.Context, sigs []solana.Signatu // https://docs.solana.com/developing/clients/jsonrpc-api#simulatetransaction // opts - (optional) use `nil` to use defaults -func (c *RpcClient) SimulateTx(ctx context.Context, tx *solana.Transaction, opts *rpc.SimulateTransactionOpts) (*rpc.SimulateTransactionResult, error) { +func (c *RPCClient) SimulateTx(ctx context.Context, tx *solana.Transaction, opts *rpc.SimulateTransactionOpts) (*rpc.SimulateTransactionResult, error) { done := c.latency("simulate_tx") defer done() @@ -268,12 +268,12 @@ func (c *RpcClient) SimulateTx(ctx context.Context, tx *solana.Transaction, opts return res.Value, nil } -func (c *RpcClient) SendTransaction(ctx context.Context, tx *solana.Transaction) error { +func (c *RPCClient) SendTransaction(ctx context.Context, tx *solana.Transaction) error { // TODO: Implement return nil } -func (c *RpcClient) SendTx(ctx context.Context, tx *solana.Transaction) (solana.Signature, error) { +func (c *RPCClient) SendTx(ctx context.Context, tx *solana.Transaction) (solana.Signature, error) { done := c.latency("send_tx") defer done() @@ -289,7 +289,7 @@ func (c *RpcClient) SendTx(ctx context.Context, tx *solana.Transaction) (solana. return c.rpc.SendTransactionWithOpts(ctx, tx, opts) } -func (c *RpcClient) GetLatestBlock() (*rpc.GetBlockResult, error) { +func (c *RPCClient) GetLatestBlock() (*rpc.GetBlockResult, error) { // get latest confirmed slot slot, err := c.SlotHeightWithCommitment(c.commitment) if err != nil { From b8d67550b482edb32e31e3149738ef107351b540 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 4 Sep 2024 13:27:58 -0400 Subject: [PATCH 10/22] Fix duplicate metrics --- pkg/solana/client/multinode/multi_node.go | 2 +- pkg/solana/client/multinode/node.go | 6 +++--- pkg/solana/client/multinode/node_fsm.go | 14 +++++++------- pkg/solana/client/multinode/node_lifecycle.go | 12 ++++++------ pkg/solana/client/multinode/transaction_sender.go | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pkg/solana/client/multinode/multi_node.go b/pkg/solana/client/multinode/multi_node.go index 386e09554..1a4846edf 100644 --- a/pkg/solana/client/multinode/multi_node.go +++ b/pkg/solana/client/multinode/multi_node.go @@ -18,7 +18,7 @@ import ( var ( // PromMultiNodeRPCNodeStates reports current RPC node state PromMultiNodeRPCNodeStates = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "multi_node_states", + Name: "solana_multi_node_states", Help: "The number of RPC nodes currently in the given state for the given chain", }, []string{"network", "chainId", "state"}) ErroringNodeError = fmt.Errorf("no live nodes available") diff --git a/pkg/solana/client/multinode/node.go b/pkg/solana/client/multinode/node.go index 8ab30f856..7d2b02ce2 100644 --- a/pkg/solana/client/multinode/node.go +++ b/pkg/solana/client/multinode/node.go @@ -21,15 +21,15 @@ var errInvalidChainID = errors.New("invalid chain id") var ( promPoolRPCNodeVerifies = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_verifies", + Name: "solana_pool_rpc_node_verifies", Help: "The total number of chain ID verifications for the given RPC node", }, []string{"network", "chainID", "nodeName"}) promPoolRPCNodeVerifiesFailed = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_verifies_failed", + Name: "solana_pool_rpc_node_verifies_failed", Help: "The total number of failed chain ID verifications for the given RPC node", }, []string{"network", "chainID", "nodeName"}) promPoolRPCNodeVerifiesSuccess = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_verifies_success", + Name: "solana_pool_rpc_node_verifies_success", Help: "The total number of successful chain ID verifications for the given RPC node", }, []string{"network", "chainID", "nodeName"}) ) diff --git a/pkg/solana/client/multinode/node_fsm.go b/pkg/solana/client/multinode/node_fsm.go index 981e325da..136910868 100644 --- a/pkg/solana/client/multinode/node_fsm.go +++ b/pkg/solana/client/multinode/node_fsm.go @@ -9,31 +9,31 @@ import ( var ( promPoolRPCNodeTransitionsToAlive = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_alive", + Name: "solana_pool_rpc_node_num_transitions_to_alive", Help: transitionString(NodeStateAlive), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToInSync = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_in_sync", + Name: "solana_pool_rpc_node_num_transitions_to_in_sync", Help: fmt.Sprintf("%s to %s", transitionString(NodeStateOutOfSync), NodeStateAlive), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToOutOfSync = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_out_of_sync", + Name: "solana_pool_rpc_node_num_transitions_to_out_of_sync", Help: transitionString(NodeStateOutOfSync), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToUnreachable = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_unreachable", + Name: "solana_pool_rpc_node_num_transitions_to_unreachable", Help: transitionString(NodeStateUnreachable), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToInvalidChainID = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_invalid_chain_id", + Name: "solana_pool_rpc_node_num_transitions_to_invalid_chain_id", Help: transitionString(NodeStateInvalidChainID), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToUnusable = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_unusable", + Name: "solana_pool_rpc_node_num_transitions_to_unusable", Help: transitionString(NodeStateUnusable), }, []string{"chainID", "nodeName"}) promPoolRPCNodeTransitionsToSyncing = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_transitions_to_syncing", + Name: "solana_pool_rpc_node_num_transitions_to_syncing", Help: transitionString(NodeStateSyncing), }, []string{"chainID", "nodeName"}) ) diff --git a/pkg/solana/client/multinode/node_lifecycle.go b/pkg/solana/client/multinode/node_lifecycle.go index 44203bf97..d6b150690 100644 --- a/pkg/solana/client/multinode/node_lifecycle.go +++ b/pkg/solana/client/multinode/node_lifecycle.go @@ -17,27 +17,27 @@ import ( var ( promPoolRPCNodeHighestSeenBlock = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "pool_rpc_node_highest_seen_block", + Name: "solana_pool_rpc_node_highest_seen_block", Help: "The highest seen block for the given RPC node", }, []string{"chainID", "nodeName"}) promPoolRPCNodeHighestFinalizedBlock = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "pool_rpc_node_highest_finalized_block", + Name: "solana_pool_rpc_node_highest_finalized_block", Help: "The highest seen finalized block for the given RPC node", }, []string{"chainID", "nodeName"}) promPoolRPCNodeNumSeenBlocks = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_num_seen_blocks", + Name: "solana_pool_rpc_node_num_seen_blocks", Help: "The total number of new blocks seen by the given RPC node", }, []string{"chainID", "nodeName"}) promPoolRPCNodePolls = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_polls_total", + Name: "solana_pool_rpc_node_polls_total", Help: "The total number of poll checks for the given RPC node", }, []string{"chainID", "nodeName"}) promPoolRPCNodePollsFailed = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_polls_failed", + Name: "solana_pool_rpc_node_polls_failed", Help: "The total number of failed poll checks for the given RPC node", }, []string{"chainID", "nodeName"}) promPoolRPCNodePollsSuccess = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "pool_rpc_node_polls_success", + Name: "solana_pool_rpc_node_polls_success", Help: "The total number of successful poll checks for the given RPC node", }, []string{"chainID", "nodeName"}) ) diff --git a/pkg/solana/client/multinode/transaction_sender.go b/pkg/solana/client/multinode/transaction_sender.go index d567e164f..71de153ae 100644 --- a/pkg/solana/client/multinode/transaction_sender.go +++ b/pkg/solana/client/multinode/transaction_sender.go @@ -19,7 +19,7 @@ import ( var ( // PromMultiNodeInvariantViolations reports violation of our assumptions PromMultiNodeInvariantViolations = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "multi_node_invariant_violations", + Name: "solana_multi_node_invariant_violations", Help: "The number of invariant violations", }, []string{"network", "chainId", "invariant"}) ) From 2cb4d77bf71db88ec70b1605ed6b5adacef3c763 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 5 Sep 2024 12:19:03 -0400 Subject: [PATCH 11/22] Add chain multinode flag --- pkg/solana/chain.go | 91 +++++++- pkg/solana/chain_multinode.go | 266 ------------------------ pkg/solana/cmd/chainlink-solana/main.go | 10 +- 3 files changed, 86 insertions(+), 281 deletions(-) delete mode 100644 pkg/solana/chain_multinode.go diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index bc2dd845a..7360f7c0c 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -70,7 +70,7 @@ func NewChain(cfg *config.TOMLConfig, opts ChainOpts) (Chain, error) { if !cfg.IsEnabled() { return nil, fmt.Errorf("cannot create new chain with ID %s: chain is disabled", *cfg.ChainID) } - c, err := newChain(*cfg.ChainID, cfg, opts.KeyStore, opts.Logger) + c, err := newChain(*cfg.ChainID, cfg, cfg.MultiNodeEnabled(), opts.KeyStore, opts.Logger) if err != nil { return nil, err } @@ -87,6 +87,11 @@ type chain struct { balanceMonitor services.Service lggr logger.Logger + // if multiNode is enabled, the clientCache will not be used + multiNodeEnabled bool + multiNode *mn.MultiNode[mn.StringID, *client.RPCClient] + txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient] + // tracking node chain id for verification clientCache map[string]*verifiedCachedClient // map URL -> {client, chainId} [mainnet/testnet/devnet/localnet] clientLock sync.RWMutex @@ -216,14 +221,70 @@ func (v *verifiedCachedClient) GetAccountInfoWithOpts(ctx context.Context, addr return v.ReaderWriter.GetAccountInfoWithOpts(ctx, addr, opts) } -func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.Logger) (*chain, error) { +func newChain(id string, cfg *config.TOMLConfig, multiNodeEnabled bool, ks loop.Keystore, lggr logger.Logger) (*chain, error) { lggr = logger.With(lggr, "chainID", id, "chain", "solana") var ch = chain{ - id: id, - cfg: cfg, - lggr: logger.Named(lggr, "Chain"), - clientCache: map[string]*verifiedCachedClient{}, + id: id, + cfg: cfg, + lggr: logger.Named(lggr, "Chain"), + multiNodeEnabled: multiNodeEnabled, + clientCache: map[string]*verifiedCachedClient{}, } + + if multiNodeEnabled { + chainFamily := "solana" + + mnCfg := cfg.MultiNodeConfig() + + var nodes []mn.Node[mn.StringID, *client.RPCClient] + + for i, nodeInfo := range cfg.ListNodes() { + // create client and check + rpcClient, err := client.NewRPCClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) + if err != nil { + lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) + continue + } + + newNode := mn.NewNode[mn.StringID, *client.Head, *client.RPCClient]( + mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, + int32(i), mn.StringID(id), 0, rpcClient, chainFamily) + + nodes = append(nodes, newNode) + } + + multiNode := mn.NewMultiNode[mn.StringID, *client.RPCClient]( + lggr, + mn.NodeSelectionModeRoundRobin, + time.Minute, // TODO: set lease duration + nodes, + []mn.SendOnlyNode[mn.StringID, *client.RPCClient]{}, + mn.StringID(id), + chainFamily, + mnCfg.DeathDeclarationDelay(), + ) + + // TODO: implement error classification + classifySendError := func(tx *solanago.Transaction, err error) mn.SendTxReturnCode { + return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) + } + + txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient]( + lggr, + mn.StringID(id), + chainFamily, + multiNode, + classifySendError, + 0, // use the default value provided by the implementation + ) + + ch.multiNode = multiNode + ch.txSender = txSender + + // clientCache will not be used if multinode is enabled + ch.clientCache = nil + } + tc := func() (client.ReaderWriter, error) { return ch.getClient() } @@ -302,6 +363,10 @@ func (c *chain) ChainID() string { // getClient returns a client, randomly selecting one from available and valid nodes func (c *chain) getClient() (client.ReaderWriter, error) { + if c.multiNodeEnabled { + return c.multiNode.SelectRPC() + } + var node *config.Node var client client.ReaderWriter nodes := c.cfg.ListNodes() @@ -381,6 +446,13 @@ func (c *chain) Start(ctx context.Context) error { c.lggr.Debug("Starting txm") c.lggr.Debug("Starting balance monitor") var ms services.MultiStart + if c.multiNodeEnabled { + c.lggr.Debug("Starting multinode") + err := ms.Start(ctx, c.multiNode, c.txSender) + if err != nil { + return err + } + } return ms.Start(ctx, c.txm, c.balanceMonitor) }) } @@ -390,6 +462,13 @@ func (c *chain) Close() error { c.lggr.Debug("Stopping") c.lggr.Debug("Stopping txm") c.lggr.Debug("Stopping balance monitor") + if c.multiNodeEnabled { + c.lggr.Debug("Stopping multinode") + err := services.CloseAll(c.multiNode, c.txSender) + if err != nil { + return err + } + } return services.CloseAll(c.txm, c.balanceMonitor) }) } diff --git a/pkg/solana/chain_multinode.go b/pkg/solana/chain_multinode.go deleted file mode 100644 index d8e4f133c..000000000 --- a/pkg/solana/chain_multinode.go +++ /dev/null @@ -1,266 +0,0 @@ -package solana - -import ( - "context" - "errors" - "fmt" - "math/big" - "time" - - solanago "github.com/gagliardetto/solana-go" - "github.com/gagliardetto/solana-go/programs/system" - "github.com/smartcontractkit/chainlink-common/pkg/chains" - "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/loop" - "github.com/smartcontractkit/chainlink-common/pkg/services" - relaytypes "github.com/smartcontractkit/chainlink-common/pkg/types" - - "github.com/smartcontractkit/chainlink-solana/pkg/solana/client" - mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" - "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" - "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" - "github.com/smartcontractkit/chainlink-solana/pkg/solana/txm" -) - -func NewMultiNodeChain(cfg *config.TOMLConfig, opts ChainOpts) (Chain, error) { - if !cfg.IsEnabled() { - return nil, fmt.Errorf("cannot create new chain with ID %s: chain is disabled", *cfg.ChainID) - } - c, err := newMultiNodeChain(*cfg.ChainID, cfg, opts.KeyStore, opts.Logger) - if err != nil { - return nil, err - } - return c, nil -} - -var _ Chain = (*multiNodeChain)(nil) - -type multiNodeChain struct { - services.StateMachine - id string - cfg *config.TOMLConfig - multiNode *mn.MultiNode[mn.StringID, *client.RPCClient] - txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient] - txm *txm.Txm - balanceMonitor services.Service - lggr logger.Logger -} - -func newMultiNodeChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.Logger) (*multiNodeChain, error) { - lggr = logger.With(lggr, "chainID", id, "chain", "solana") - - chainFamily := "solana" - - cfg.BlockHistoryPollPeriod() - - mnCfg := cfg.MultiNodeConfig() - - var nodes []mn.Node[mn.StringID, *client.RPCClient] - - for i, nodeInfo := range cfg.ListNodes() { - // create client and check - rpcClient, err := client.NewRPCClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) - if err != nil { - lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) - continue - } - - newNode := mn.NewNode[mn.StringID, *client.Head, *client.RPCClient]( - mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, - int32(i), mn.StringID(id), 0, rpcClient, chainFamily) - - nodes = append(nodes, newNode) - } - - multiNode := mn.NewMultiNode[mn.StringID, *client.RPCClient]( - lggr, - mn.NodeSelectionModeRoundRobin, - time.Duration(0), // TODO: set lease duration - nodes, - []mn.SendOnlyNode[mn.StringID, *client.RPCClient]{}, // TODO: no send only nodes? - mn.StringID(id), - chainFamily, - time.Duration(0), // TODO: set deathDeclarationDelay - ) - - classifySendError := func(tx *solanago.Transaction, err error) mn.SendTxReturnCode { - return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) - } - - txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient]( - lggr, - mn.StringID(id), - chainFamily, - multiNode, - classifySendError, - 0, // use the default value provided by the implementation - ) - - var ch = multiNodeChain{ - id: id, - cfg: cfg, - multiNode: multiNode, - txSender: txSender, - lggr: logger.Named(lggr, "Chain"), - } - - tc := func() (client.ReaderWriter, error) { - return ch.multiNode.SelectRPC() - } - - ch.txm = txm.NewTxm(ch.id, tc, cfg, ks, lggr) - bc := func() (monitor.BalanceClient, error) { - return ch.multiNode.SelectRPC() - } - ch.balanceMonitor = monitor.NewBalanceMonitor(ch.id, cfg, lggr, ks, bc) - return &ch, nil -} - -// ChainService interface -func (c *multiNodeChain) GetChainStatus(ctx context.Context) (relaytypes.ChainStatus, error) { - toml, err := c.cfg.TOMLString() - if err != nil { - return relaytypes.ChainStatus{}, err - } - return relaytypes.ChainStatus{ - ID: c.id, - Enabled: c.cfg.IsEnabled(), - Config: toml, - }, nil -} - -func (c *multiNodeChain) ListNodeStatuses(ctx context.Context, pageSize int32, pageToken string) (stats []relaytypes.NodeStatus, nextPageToken string, total int, err error) { - return chains.ListNodeStatuses(int(pageSize), pageToken, c.listNodeStatuses) -} - -func (c *multiNodeChain) Transact(ctx context.Context, from, to string, amount *big.Int, balanceCheck bool) error { - return c.sendTx(ctx, from, to, amount, balanceCheck) -} - -func (c *multiNodeChain) listNodeStatuses(start, end int) ([]relaytypes.NodeStatus, int, error) { - stats := make([]relaytypes.NodeStatus, 0) - total := len(c.cfg.Nodes) - if start >= total { - return stats, total, chains.ErrOutOfRange - } - if end > total { - end = total - } - nodes := c.cfg.Nodes[start:end] - for _, node := range nodes { - stat, err := config.NodeStatus(node, c.ChainID()) - if err != nil { - return stats, total, err - } - stats = append(stats, stat) - } - return stats, total, nil -} - -func (c *multiNodeChain) Name() string { - return c.lggr.Name() -} - -func (c *multiNodeChain) ID() string { - return c.id -} - -func (c *multiNodeChain) Config() config.Config { - return c.cfg -} - -func (c *multiNodeChain) TxManager() TxManager { - return c.txm -} - -func (c *multiNodeChain) Reader() (client.Reader, error) { - return c.multiNode.SelectRPC() -} - -func (c *multiNodeChain) ChainID() string { - return c.id -} - -func (c *multiNodeChain) Start(ctx context.Context) error { - return c.StartOnce("Chain", func() error { - c.lggr.Debug("Starting") - c.lggr.Debug("Starting txm") - c.lggr.Debug("Starting balance monitor") - var ms services.MultiStart - return ms.Start(ctx, c.txm, c.balanceMonitor) - }) -} - -func (c *multiNodeChain) Close() error { - return c.StopOnce("Chain", func() error { - c.lggr.Debug("Stopping") - c.lggr.Debug("Stopping txm") - c.lggr.Debug("Stopping balance monitor") - return services.CloseAll(c.txm, c.balanceMonitor) - }) -} - -func (c *multiNodeChain) Ready() error { - return errors.Join( - c.StateMachine.Ready(), - c.txm.Ready(), - ) -} - -func (c *multiNodeChain) HealthReport() map[string]error { - report := map[string]error{c.Name(): c.Healthy()} - services.CopyHealth(report, c.txm.HealthReport()) - return report -} - -func (c *multiNodeChain) sendTx(ctx context.Context, from, to string, amount *big.Int, balanceCheck bool) error { - reader, err := c.Reader() - if err != nil { - return fmt.Errorf("chain unreachable: %w", err) - } - - fromKey, err := solanago.PublicKeyFromBase58(from) - if err != nil { - return fmt.Errorf("failed to parse from key: %w", err) - } - toKey, err := solanago.PublicKeyFromBase58(to) - if err != nil { - return fmt.Errorf("failed to parse to key: %w", err) - } - if !amount.IsUint64() { - return fmt.Errorf("amount %s overflows uint64", amount) - } - amountI := amount.Uint64() - - blockhash, err := reader.LatestBlockhash() - if err != nil { - return fmt.Errorf("failed to get latest block hash: %w", err) - } - tx, err := solanago.NewTransaction( - []solanago.Instruction{ - system.NewTransferInstruction( - amountI, - fromKey, - toKey, - ).Build(), - }, - blockhash.Value.Blockhash, - solanago.TransactionPayer(fromKey), - ) - if err != nil { - return fmt.Errorf("failed to create tx: %w", err) - } - - if balanceCheck { - if err = solanaValidateBalance(reader, fromKey, amountI, tx.Message.ToBase64()); err != nil { - return fmt.Errorf("failed to validate balance: %w", err) - } - } - - txm := c.TxManager() - err = txm.Enqueue("", tx) - if err != nil { - return fmt.Errorf("transaction failed: %w", err) - } - return nil -} diff --git a/pkg/solana/cmd/chainlink-solana/main.go b/pkg/solana/cmd/chainlink-solana/main.go index 6a966a693..d65f6cbc9 100644 --- a/pkg/solana/cmd/chainlink-solana/main.go +++ b/pkg/solana/cmd/chainlink-solana/main.go @@ -67,15 +67,7 @@ func (c *pluginRelayer) NewRelayer(ctx context.Context, config string, keystore KeyStore: keystore, } - var chain solana.Chain - var err error - - if cfg.Solana.MultiNodeConfig().MultiNodeEnabled() { - chain, err = solana.NewMultiNodeChain(&cfg.Solana, opts) - } else { - chain, err = solana.NewChain(&cfg.Solana, opts) - } - + chain, err := solana.NewChain(&cfg.Solana, opts) if err != nil { return nil, fmt.Errorf("failed to create chain: %w", err) } From 0b33b1f9f8384de519a021af80e90c5b0e193c09 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Fri, 6 Sep 2024 12:07:39 -0400 Subject: [PATCH 12/22] Extend client --- pkg/solana/chain.go | 55 +++--- pkg/solana/client/client.go | 70 +++++++ pkg/solana/client/rpc_client.go | 312 -------------------------------- 3 files changed, 95 insertions(+), 342 deletions(-) delete mode 100644 pkg/solana/client/rpc_client.go diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index 7360f7c0c..9a6068f03 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "math/big" "math/rand" "strings" @@ -70,7 +71,7 @@ func NewChain(cfg *config.TOMLConfig, opts ChainOpts) (Chain, error) { if !cfg.IsEnabled() { return nil, fmt.Errorf("cannot create new chain with ID %s: chain is disabled", *cfg.ChainID) } - c, err := newChain(*cfg.ChainID, cfg, cfg.MultiNodeEnabled(), opts.KeyStore, opts.Logger) + c, err := newChain(*cfg.ChainID, cfg, opts.KeyStore, opts.Logger) if err != nil { return nil, err } @@ -88,9 +89,8 @@ type chain struct { lggr logger.Logger // if multiNode is enabled, the clientCache will not be used - multiNodeEnabled bool - multiNode *mn.MultiNode[mn.StringID, *client.RPCClient] - txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient] + multiNode *mn.MultiNode[mn.StringID, *client.Client] + txSender *mn.TransactionSender[*solanago.Transaction, mn.StringID, *client.Client] // tracking node chain id for verification clientCache map[string]*verifiedCachedClient // map URL -> {client, chainId} [mainnet/testnet/devnet/localnet] @@ -221,44 +221,43 @@ func (v *verifiedCachedClient) GetAccountInfoWithOpts(ctx context.Context, addr return v.ReaderWriter.GetAccountInfoWithOpts(ctx, addr, opts) } -func newChain(id string, cfg *config.TOMLConfig, multiNodeEnabled bool, ks loop.Keystore, lggr logger.Logger) (*chain, error) { +func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.Logger) (*chain, error) { lggr = logger.With(lggr, "chainID", id, "chain", "solana") var ch = chain{ - id: id, - cfg: cfg, - lggr: logger.Named(lggr, "Chain"), - multiNodeEnabled: multiNodeEnabled, - clientCache: map[string]*verifiedCachedClient{}, + id: id, + cfg: cfg, + lggr: logger.Named(lggr, "Chain"), + clientCache: map[string]*verifiedCachedClient{}, } - if multiNodeEnabled { + if cfg.MultiNodeEnabled() { chainFamily := "solana" mnCfg := cfg.MultiNodeConfig() - var nodes []mn.Node[mn.StringID, *client.RPCClient] + var nodes []mn.Node[mn.StringID, *client.Client] for i, nodeInfo := range cfg.ListNodes() { // create client and check - rpcClient, err := client.NewRPCClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) + rpcClient, err := client.NewClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) if err != nil { lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) continue } - newNode := mn.NewNode[mn.StringID, *client.Head, *client.RPCClient]( + newNode := mn.NewNode[mn.StringID, *client.Head, *client.Client]( mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, int32(i), mn.StringID(id), 0, rpcClient, chainFamily) nodes = append(nodes, newNode) } - multiNode := mn.NewMultiNode[mn.StringID, *client.RPCClient]( + multiNode := mn.NewMultiNode[mn.StringID, *client.Client]( lggr, mn.NodeSelectionModeRoundRobin, time.Minute, // TODO: set lease duration nodes, - []mn.SendOnlyNode[mn.StringID, *client.RPCClient]{}, + []mn.SendOnlyNode[mn.StringID, *client.Client]{}, mn.StringID(id), chainFamily, mnCfg.DeathDeclarationDelay(), @@ -269,7 +268,7 @@ func newChain(id string, cfg *config.TOMLConfig, multiNodeEnabled bool, ks loop. return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) } - txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.RPCClient]( + txSender := mn.NewTransactionSender[*solanago.Transaction, mn.StringID, *client.Client]( lggr, mn.StringID(id), chainFamily, @@ -363,7 +362,7 @@ func (c *chain) ChainID() string { // getClient returns a client, randomly selecting one from available and valid nodes func (c *chain) getClient() (client.ReaderWriter, error) { - if c.multiNodeEnabled { + if c.cfg.MultiNodeEnabled() { return c.multiNode.SelectRPC() } @@ -446,14 +445,12 @@ func (c *chain) Start(ctx context.Context) error { c.lggr.Debug("Starting txm") c.lggr.Debug("Starting balance monitor") var ms services.MultiStart - if c.multiNodeEnabled { + startAll := []services.StartClose{c.txm, c.balanceMonitor} + if c.cfg.MultiNodeEnabled() { c.lggr.Debug("Starting multinode") - err := ms.Start(ctx, c.multiNode, c.txSender) - if err != nil { - return err - } + startAll = append(startAll, c.multiNode, c.txSender) } - return ms.Start(ctx, c.txm, c.balanceMonitor) + return ms.Start(ctx, startAll...) }) } @@ -462,14 +459,12 @@ func (c *chain) Close() error { c.lggr.Debug("Stopping") c.lggr.Debug("Stopping txm") c.lggr.Debug("Stopping balance monitor") - if c.multiNodeEnabled { + closeAll := []io.Closer{c.txm, c.balanceMonitor} + if c.cfg.MultiNodeEnabled() { c.lggr.Debug("Stopping multinode") - err := services.CloseAll(c.multiNode, c.txSender) - if err != nil { - return err - } + closeAll = append(closeAll, c.multiNode, c.txSender) } - return services.CloseAll(c.txm, c.balanceMonitor) + return services.CloseAll(closeAll...) }) } diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index d2294824d..21111fab4 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "math/big" "time" mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" @@ -67,6 +68,25 @@ type Client struct { requestGroup *singleflight.Group } +type Head struct { + rpc.GetBlockResult +} + +func (h *Head) BlockNumber() int64 { + if h.BlockHeight == nil { + return 0 + } + return int64(*h.BlockHeight) +} + +func (h *Head) BlockDifficulty() *big.Int { + return nil +} + +func (h *Head) IsValid() bool { + return true +} + func NewClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*Client, error) { return &Client{ url: endpoint, @@ -81,6 +101,56 @@ func NewClient(endpoint string, cfg config.Config, requestTimeout time.Duration, }, nil } +var _ mn.RPCClient[mn.StringID, *Head] = (*Client)(nil) +var _ mn.SendTxRPCClient[*solana.Transaction] = (*Client)(nil) + +// TODO: BCI-4061: Implement Client for MultiNode + +func (c *Client) Dial(ctx context.Context) error { + //TODO implement me + panic("implement me") +} + +func (c *Client) SubscribeToHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { + //TODO implement me + panic("implement me") +} + +func (c *Client) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { + //TODO implement me + panic("implement me") +} + +func (c *Client) Ping(ctx context.Context) error { + //TODO implement me + panic("implement me") +} + +func (c *Client) IsSyncing(ctx context.Context) (bool, error) { + //TODO implement me + panic("implement me") +} + +func (c *Client) UnsubscribeAllExcept(subs ...mn.Subscription) { + //TODO implement me + panic("implement me") +} + +func (c *Client) Close() { + //TODO implement me + panic("implement me") +} + +func (c *Client) GetInterceptedChainInfo() (latest, highestUserObservations mn.ChainInfo) { + //TODO implement me + panic("implement me") +} + +func (c *Client) SendTransaction(ctx context.Context, tx *solana.Transaction) error { + // TODO: Implement + return nil +} + func (c *Client) latency(name string) func() { start := time.Now() return func() { diff --git a/pkg/solana/client/rpc_client.go b/pkg/solana/client/rpc_client.go deleted file mode 100644 index c9ceeab6a..000000000 --- a/pkg/solana/client/rpc_client.go +++ /dev/null @@ -1,312 +0,0 @@ -package client - -import ( - "context" - "errors" - "fmt" - "math/big" - "time" - - "github.com/gagliardetto/solana-go" - "github.com/gagliardetto/solana-go/rpc" - "golang.org/x/sync/singleflight" - - "github.com/smartcontractkit/chainlink-common/pkg/logger" - - mn "github.com/smartcontractkit/chainlink-solana/pkg/solana/client/multinode" - "github.com/smartcontractkit/chainlink-solana/pkg/solana/config" - "github.com/smartcontractkit/chainlink-solana/pkg/solana/monitor" -) - -var _ ReaderWriter = (*RPCClient)(nil) - -type Head struct { - rpc.GetBlockResult -} - -func (h *Head) BlockNumber() int64 { - if h.BlockHeight == nil { - return 0 - } - return int64(*h.BlockHeight) -} - -func (h *Head) BlockDifficulty() *big.Int { - return nil -} - -func (h *Head) IsValid() bool { - return true -} - -type RPCClient struct { - url string - rpc *rpc.Client - skipPreflight bool // to enable or disable preflight checks - commitment rpc.CommitmentType - maxRetries *uint - txTimeout time.Duration - contextDuration time.Duration - log logger.Logger - - // provides a duplicate function call suppression mechanism - requestGroup *singleflight.Group -} - -// TODO: BCI-4061: Implement RPC Client for MultiNode - -func (c *RPCClient) Dial(ctx context.Context) error { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) SubscribeToHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *Head, mn.Subscription, error) { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) Ping(ctx context.Context) error { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) IsSyncing(ctx context.Context) (bool, error) { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) UnsubscribeAllExcept(subs ...mn.Subscription) { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) Close() { - //TODO implement me - panic("implement me") -} - -func (c *RPCClient) GetInterceptedChainInfo() (latest, highestUserObservations mn.ChainInfo) { - //TODO implement me - panic("implement me") -} - -func NewRPCClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*RPCClient, error) { - return &RPCClient{ - url: endpoint, - rpc: rpc.New(endpoint), - skipPreflight: cfg.SkipPreflight(), - commitment: cfg.Commitment(), - maxRetries: cfg.MaxRetries(), - txTimeout: cfg.TxTimeout(), - contextDuration: requestTimeout, - log: log, - requestGroup: &singleflight.Group{}, - }, nil -} - -func (c *RPCClient) latency(name string) func() { - start := time.Now() - return func() { - monitor.SetClientLatency(time.Since(start), name, c.url) - } -} - -func (c *RPCClient) Balance(addr solana.PublicKey) (uint64, error) { - done := c.latency("balance") - defer done() - - ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) - defer cancel() - - v, err, _ := c.requestGroup.Do(fmt.Sprintf("GetBalance(%s)", addr.String()), func() (interface{}, error) { - return c.rpc.GetBalance(ctx, addr, c.commitment) - }) - if err != nil { - return 0, err - } - res := v.(*rpc.GetBalanceResult) - return res.Value, err -} - -func (c *RPCClient) SlotHeight() (uint64, error) { - return c.SlotHeightWithCommitment(rpc.CommitmentProcessed) // get the latest slot height -} - -func (c *RPCClient) SlotHeightWithCommitment(commitment rpc.CommitmentType) (uint64, error) { - done := c.latency("slot_height") - defer done() - - ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) - defer cancel() - v, err, _ := c.requestGroup.Do("GetSlotHeight", func() (interface{}, error) { - return c.rpc.GetSlot(ctx, commitment) - }) - return v.(uint64), err -} - -func (c *RPCClient) GetAccountInfoWithOpts(ctx context.Context, addr solana.PublicKey, opts *rpc.GetAccountInfoOpts) (*rpc.GetAccountInfoResult, error) { - done := c.latency("account_info") - defer done() - - ctx, cancel := context.WithTimeout(ctx, c.contextDuration) - defer cancel() - opts.Commitment = c.commitment // overrides passed in value - use defined client commitment type - return c.rpc.GetAccountInfoWithOpts(ctx, addr, opts) -} - -func (c *RPCClient) LatestBlockhash() (*rpc.GetLatestBlockhashResult, error) { - done := c.latency("latest_blockhash") - defer done() - - ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) - defer cancel() - - v, err, _ := c.requestGroup.Do("GetLatestBlockhash", func() (interface{}, error) { - return c.rpc.GetLatestBlockhash(ctx, c.commitment) - }) - return v.(*rpc.GetLatestBlockhashResult), err -} - -func (c *RPCClient) ChainID(ctx context.Context) (mn.StringID, error) { - done := c.latency("chain_id") - defer done() - - ctx, cancel := context.WithTimeout(ctx, c.contextDuration) - defer cancel() - v, err, _ := c.requestGroup.Do("GetGenesisHash", func() (interface{}, error) { - return c.rpc.GetGenesisHash(ctx) - }) - if err != nil { - return "", err - } - hash := v.(solana.Hash) - - var network string - switch hash.String() { - case DevnetGenesisHash: - network = "devnet" - case TestnetGenesisHash: - network = "testnet" - case MainnetGenesisHash: - network = "mainnet" - default: - c.log.Warnf("unknown genesis hash - assuming solana chain is 'localnet'") - network = "localnet" - } - return mn.StringID(network), nil -} - -func (c *RPCClient) GetFeeForMessage(msg string) (uint64, error) { - done := c.latency("fee_for_message") - defer done() - - // msg is base58 encoded data - - ctx, cancel := context.WithTimeout(context.Background(), c.contextDuration) - defer cancel() - res, err := c.rpc.GetFeeForMessage(ctx, msg, c.commitment) - if err != nil { - return 0, fmt.Errorf("error in GetFeeForMessage: %w", err) - } - - if res == nil || res.Value == nil { - return 0, errors.New("nil pointer in GetFeeForMessage") - } - return *res.Value, nil -} - -// https://docs.solana.com/developing/clients/jsonrpc-api#getsignaturestatuses -func (c *RPCClient) SignatureStatuses(ctx context.Context, sigs []solana.Signature) ([]*rpc.SignatureStatusesResult, error) { - done := c.latency("signature_statuses") - defer done() - - ctx, cancel := context.WithTimeout(ctx, c.contextDuration) - defer cancel() - - // searchTransactionHistory = false - res, err := c.rpc.GetSignatureStatuses(ctx, false, sigs...) - if err != nil { - return nil, fmt.Errorf("error in GetSignatureStatuses: %w", err) - } - - if res == nil || res.Value == nil { - return nil, errors.New("nil pointer in GetSignatureStatuses") - } - return res.Value, nil -} - -// https://docs.solana.com/developing/clients/jsonrpc-api#simulatetransaction -// opts - (optional) use `nil` to use defaults -func (c *RPCClient) SimulateTx(ctx context.Context, tx *solana.Transaction, opts *rpc.SimulateTransactionOpts) (*rpc.SimulateTransactionResult, error) { - done := c.latency("simulate_tx") - defer done() - - ctx, cancel := context.WithTimeout(ctx, c.contextDuration) - defer cancel() - - if opts == nil { - opts = &rpc.SimulateTransactionOpts{ - SigVerify: true, // verify signature - Commitment: c.commitment, - } - } - - res, err := c.rpc.SimulateTransactionWithOpts(ctx, tx, opts) - if err != nil { - return nil, fmt.Errorf("error in SimulateTransactionWithOpts: %w", err) - } - - if res == nil || res.Value == nil { - return nil, errors.New("nil pointer in SimulateTransactionWithOpts") - } - - return res.Value, nil -} - -func (c *RPCClient) SendTransaction(ctx context.Context, tx *solana.Transaction) error { - // TODO: Implement - return nil -} - -func (c *RPCClient) SendTx(ctx context.Context, tx *solana.Transaction) (solana.Signature, error) { - done := c.latency("send_tx") - defer done() - - ctx, cancel := context.WithTimeout(ctx, c.txTimeout) - defer cancel() - - opts := rpc.TransactionOpts{ - SkipPreflight: c.skipPreflight, - PreflightCommitment: c.commitment, - MaxRetries: c.maxRetries, - } - - return c.rpc.SendTransactionWithOpts(ctx, tx, opts) -} - -func (c *RPCClient) GetLatestBlock() (*rpc.GetBlockResult, error) { - // get latest confirmed slot - slot, err := c.SlotHeightWithCommitment(c.commitment) - if err != nil { - return nil, fmt.Errorf("GetLatestBlock.SlotHeight: %w", err) - } - - // get block based on slot - done := c.latency("latest_block") - defer done() - ctx, cancel := context.WithTimeout(context.Background(), c.txTimeout) - defer cancel() - v, err, _ := c.requestGroup.Do("GetBlockWithOpts", func() (interface{}, error) { - version := uint64(0) // pull all tx types (legacy + v0) - return c.rpc.GetBlockWithOpts(ctx, slot, &rpc.GetBlockOpts{ - Commitment: c.commitment, - MaxSupportedTransactionVersion: &version, - }) - }) - return v.(*rpc.GetBlockResult), err -} From d8d312ccfad986ae3324bc4a29f1c5060ff30459 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 10 Sep 2024 11:38:24 -0400 Subject: [PATCH 13/22] Address comments --- pkg/solana/chain.go | 10 +++++++--- pkg/solana/client/client.go | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index 9a6068f03..c7cc09e7f 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -238,11 +238,14 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L var nodes []mn.Node[mn.StringID, *client.Client] for i, nodeInfo := range cfg.ListNodes() { + if nodeInfo == nil || nodeInfo.Name == nil || nodeInfo.URL == nil { + return nil, fmt.Errorf("node config contains nil: %+v", nodeInfo) + } // create client and check rpcClient, err := client.NewClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) if err != nil { lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) - continue + return nil, fmt.Errorf("failed to create client: %w", err) } newNode := mn.NewNode[mn.StringID, *client.Head, *client.Client]( @@ -255,7 +258,7 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L multiNode := mn.NewMultiNode[mn.StringID, *client.Client]( lggr, mn.NodeSelectionModeRoundRobin, - time.Minute, // TODO: set lease duration + 0, nodes, []mn.SendOnlyNode[mn.StringID, *client.Client]{}, mn.StringID(id), @@ -263,7 +266,8 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L mnCfg.DeathDeclarationDelay(), ) - // TODO: implement error classification + // TODO: implement error classification; move logic to separate file if large + // TODO: might be useful to reference anza-xyz/agave@master/sdk/src/transaction/error.rs classifySendError := func(tx *solanago.Transaction, err error) mn.SendTxReturnCode { return 0 // TODO ClassifySendError(err, clientErrors, logger.Sugared(logger.Nop()), tx, common.Address{}, false) } diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index 21111fab4..d2c4a6b08 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -84,6 +84,9 @@ func (h *Head) BlockDifficulty() *big.Int { } func (h *Head) IsValid() bool { + if h.BlockHeight == nil { + return false + } return true } From 3c3756e0b3301c8a524e84310ea7955fc91eb365 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 13:07:08 -0400 Subject: [PATCH 14/22] lint --- .golangci.yml | 1 + pkg/solana/client/client.go | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 0e66a8650..02b479dbf 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -30,6 +30,7 @@ linters-settings: excludes: - G101 - G104 + - G115 # - G204 # - G304 # - G404 diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index d2c4a6b08..183ff7a92 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -84,10 +84,7 @@ func (h *Head) BlockDifficulty() *big.Int { } func (h *Head) IsValid() bool { - if h.BlockHeight == nil { - return false - } - return true + return h.BlockHeight != nil } func NewClient(endpoint string, cfg config.Config, requestTimeout time.Duration, log logger.Logger) (*Client, error) { From 2521670c879b95c28a786f71e9d9cc70d7f68a30 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 13:24:45 -0400 Subject: [PATCH 15/22] Fix lint overflow issues --- .golangci.yml | 1 - pkg/solana/chain.go | 2 +- pkg/solana/client/client.go | 4 ++-- pkg/solana/client/multinode/node.go | 4 ++-- pkg/solana/client/multinode/node_fsm.go | 4 ++-- pkg/solana/client/multinode/node_lifecycle.go | 4 ++-- pkg/solana/client/multinode/node_selector_highest_head.go | 6 +----- pkg/solana/client/multinode/node_selector_priority_level.go | 4 ++-- pkg/solana/client/multinode/node_selector_round_robin.go | 4 ++-- pkg/solana/client/multinode/types.go | 6 +++--- 10 files changed, 17 insertions(+), 22 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 02b479dbf..0e66a8650 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -30,7 +30,6 @@ linters-settings: excludes: - G101 - G104 - - G115 # - G204 # - G304 # - G404 diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index 152e9e304..e2747014c 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -250,7 +250,7 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L newNode := mn.NewNode[mn.StringID, *client.Head, *client.Client]( mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, - int32(i), mn.StringID(id), 0, rpcClient, chainFamily) + i, mn.StringID(id), 0, rpcClient, chainFamily) nodes = append(nodes, newNode) } diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index 183ff7a92..3d6e3fb98 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -72,11 +72,11 @@ type Head struct { rpc.GetBlockResult } -func (h *Head) BlockNumber() int64 { +func (h *Head) BlockNumber() uint64 { if h.BlockHeight == nil { return 0 } - return int64(*h.BlockHeight) + return *h.BlockHeight } func (h *Head) BlockDifficulty() *big.Int { diff --git a/pkg/solana/client/multinode/node.go b/pkg/solana/client/multinode/node.go index 7d2b02ce2..afdece741 100644 --- a/pkg/solana/client/multinode/node.go +++ b/pkg/solana/client/multinode/node.go @@ -89,7 +89,7 @@ type node[ services.StateMachine lfcLog logger.Logger name string - id int32 + id int chainID CHAIN_ID nodePoolCfg NodeConfig chainCfg ChainConfig @@ -124,7 +124,7 @@ func NewNode[ wsuri url.URL, httpuri *url.URL, name string, - id int32, + id int, chainID CHAIN_ID, nodeOrder int32, rpc RPC, diff --git a/pkg/solana/client/multinode/node_fsm.go b/pkg/solana/client/multinode/node_fsm.go index 136910868..5d0176c02 100644 --- a/pkg/solana/client/multinode/node_fsm.go +++ b/pkg/solana/client/multinode/node_fsm.go @@ -150,10 +150,10 @@ func (n *node[CHAIN_ID, HEAD, RPC]) isFinalizedBlockOutOfSync() bool { highestObservedByCaller := n.poolInfoProvider.HighestUserObservations() latest, _ := n.rpc.GetInterceptedChainInfo() if n.chainCfg.FinalityTagEnabled() { - return latest.FinalizedBlockNumber < highestObservedByCaller.FinalizedBlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) + return latest.FinalizedBlockNumber < highestObservedByCaller.FinalizedBlockNumber-uint64(n.chainCfg.FinalizedBlockOffset()) } - return latest.BlockNumber < highestObservedByCaller.BlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) + return latest.BlockNumber < highestObservedByCaller.BlockNumber-uint64(n.chainCfg.FinalizedBlockOffset()) } // StateAndLatest returns nodeState with the latest ChainInfo observed by Node during current lifecycle. diff --git a/pkg/solana/client/multinode/node_lifecycle.go b/pkg/solana/client/multinode/node_lifecycle.go index d6b150690..427ccb216 100644 --- a/pkg/solana/client/multinode/node_lifecycle.go +++ b/pkg/solana/client/multinode/node_lifecycle.go @@ -335,7 +335,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) onNewHead(lggr logger.SugaredLogger, chainIn chainInfo.BlockNumber = head.BlockNumber() if !n.chainCfg.FinalityTagEnabled() { - latestFinalizedBN := max(head.BlockNumber()-int64(n.chainCfg.FinalityDepth()), 0) + latestFinalizedBN := max(head.BlockNumber()-uint64(n.chainCfg.FinalityDepth()), 0) if latestFinalizedBN > chainInfo.FinalizedBlockNumber { promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) chainInfo.FinalizedBlockNumber = latestFinalizedBN @@ -368,7 +368,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) isOutOfSyncWithPool(localState ChainInfo) (o mode := n.nodePoolCfg.SelectionMode() switch mode { case NodeSelectionModeHighestHead, NodeSelectionModeRoundRobin, NodeSelectionModePriorityLevel: - return localState.BlockNumber < ci.BlockNumber-int64(threshold), ln + return localState.BlockNumber < ci.BlockNumber-uint64(threshold), ln case NodeSelectionModeTotalDifficulty: bigThreshold := big.NewInt(int64(threshold)) return localState.TotalDifficulty.Cmp(bigmath.Sub(ci.TotalDifficulty, bigThreshold)) < 0, ln diff --git a/pkg/solana/client/multinode/node_selector_highest_head.go b/pkg/solana/client/multinode/node_selector_highest_head.go index 52188bbdf..c7d0d1e3d 100644 --- a/pkg/solana/client/multinode/node_selector_highest_head.go +++ b/pkg/solana/client/multinode/node_selector_highest_head.go @@ -1,9 +1,5 @@ package client -import ( - "math" -) - type highestHeadNodeSelector[ CHAIN_ID ID, RPC any, @@ -17,7 +13,7 @@ func NewHighestHeadNodeSelector[ } func (s highestHeadNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { - var highestHeadNumber int64 = math.MinInt64 + var highestHeadNumber uint64 var highestHeadNodes []Node[CHAIN_ID, RPC] for _, n := range s { state, currentChainInfo := n.StateAndLatest() diff --git a/pkg/solana/client/multinode/node_selector_priority_level.go b/pkg/solana/client/multinode/node_selector_priority_level.go index 3e171b98b..ead720976 100644 --- a/pkg/solana/client/multinode/node_selector_priority_level.go +++ b/pkg/solana/client/multinode/node_selector_priority_level.go @@ -41,8 +41,8 @@ func (s priorityLevelNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { priorityLevel := nodes[len(nodes)-1].priority // NOTE: Inc returns the number after addition, so we must -1 to get the "current" counter - count := s.roundRobinCount[priorityLevel].Add(1) - 1 - idx := int(count % uint32(len(nodes))) + count := int(s.roundRobinCount[priorityLevel].Add(1) - 1) + idx := count % len(nodes) return nodes[idx].node } diff --git a/pkg/solana/client/multinode/node_selector_round_robin.go b/pkg/solana/client/multinode/node_selector_round_robin.go index 52fa9d6c8..c5ed8d853 100644 --- a/pkg/solana/client/multinode/node_selector_round_robin.go +++ b/pkg/solana/client/multinode/node_selector_round_robin.go @@ -35,8 +35,8 @@ func (s *roundRobinSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { } // NOTE: Inc returns the number after addition, so we must -1 to get the "current" counter - count := s.roundRobinCount.Add(1) - 1 - idx := int(count % uint32(nNodes)) + count := int(s.roundRobinCount.Add(1) - 1) + idx := count % nNodes return liveNodes[idx] } diff --git a/pkg/solana/client/multinode/types.go b/pkg/solana/client/multinode/types.go index 51b70e573..2c177a9dc 100644 --- a/pkg/solana/client/multinode/types.go +++ b/pkg/solana/client/multinode/types.go @@ -68,7 +68,7 @@ type RPCClient[ // Head is the interface required by the NodeClient type Head interface { - BlockNumber() int64 + BlockNumber() uint64 BlockDifficulty() *big.Int IsValid() bool } @@ -86,8 +86,8 @@ type PoolChainInfoProvider interface { // ChainInfo - defines RPC's or MultiNode's view on the chain type ChainInfo struct { - BlockNumber int64 - FinalizedBlockNumber int64 + BlockNumber uint64 + FinalizedBlockNumber uint64 TotalDifficulty *big.Int } From 5b5cfd671379471d512233c835fdb38c072f232d Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 13:33:00 -0400 Subject: [PATCH 16/22] Update transaction_sender.go --- pkg/solana/client/multinode/transaction_sender.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/solana/client/multinode/transaction_sender.go b/pkg/solana/client/multinode/transaction_sender.go index 71de153ae..fbd5acca5 100644 --- a/pkg/solana/client/multinode/transaction_sender.go +++ b/pkg/solana/client/multinode/transaction_sender.go @@ -145,7 +145,7 @@ func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) SendTransaction(ctx contex }() if err != nil { - return 0, err + return Retryable, err } txSender.wg.Add(1) @@ -212,7 +212,7 @@ func aggregateTxResults(resultsByCode sendTxResults) (returnCode SendTxReturnCod func (txSender *TransactionSender[TX, CHAIN_ID, RPC]) collectTxResults(ctx context.Context, tx TX, healthyNodesNum int, txResults <-chan sendTxResult) (SendTxReturnCode, error) { if healthyNodesNum == 0 { - return 0, ErroringNodeError + return Retryable, ErroringNodeError } requiredResults := int(math.Ceil(float64(healthyNodesNum) * sendTxQuorum)) errorsByCode := sendTxResults{} @@ -223,7 +223,7 @@ loop: select { case <-ctx.Done(): txSender.lggr.Debugw("Failed to collect of the results before context was done", "tx", tx, "errorsByCode", errorsByCode) - return 0, ctx.Err() + return Retryable, ctx.Err() case result := <-txResults: errorsByCode[result.ResultCode] = append(errorsByCode[result.ResultCode], result.Err) resultsCount++ From 690f8124b9c17d25b01f0696f3c648bb3df8ab31 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 13:53:28 -0400 Subject: [PATCH 17/22] Fix lint --- pkg/solana/client/client.go | 5 +++-- pkg/solana/client/multinode/node_fsm.go | 4 ++-- pkg/solana/client/multinode/node_lifecycle.go | 4 ++-- pkg/solana/client/multinode/node_selector_highest_head.go | 4 +++- pkg/solana/client/multinode/types.go | 6 +++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index 3d6e3fb98..35ab96e62 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -72,11 +72,12 @@ type Head struct { rpc.GetBlockResult } -func (h *Head) BlockNumber() uint64 { +func (h *Head) BlockNumber() int64 { if h.BlockHeight == nil { return 0 } - return *h.BlockHeight + //nolint:gosec + return int64(*h.BlockHeight) } func (h *Head) BlockDifficulty() *big.Int { diff --git a/pkg/solana/client/multinode/node_fsm.go b/pkg/solana/client/multinode/node_fsm.go index 5d0176c02..136910868 100644 --- a/pkg/solana/client/multinode/node_fsm.go +++ b/pkg/solana/client/multinode/node_fsm.go @@ -150,10 +150,10 @@ func (n *node[CHAIN_ID, HEAD, RPC]) isFinalizedBlockOutOfSync() bool { highestObservedByCaller := n.poolInfoProvider.HighestUserObservations() latest, _ := n.rpc.GetInterceptedChainInfo() if n.chainCfg.FinalityTagEnabled() { - return latest.FinalizedBlockNumber < highestObservedByCaller.FinalizedBlockNumber-uint64(n.chainCfg.FinalizedBlockOffset()) + return latest.FinalizedBlockNumber < highestObservedByCaller.FinalizedBlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) } - return latest.BlockNumber < highestObservedByCaller.BlockNumber-uint64(n.chainCfg.FinalizedBlockOffset()) + return latest.BlockNumber < highestObservedByCaller.BlockNumber-int64(n.chainCfg.FinalizedBlockOffset()) } // StateAndLatest returns nodeState with the latest ChainInfo observed by Node during current lifecycle. diff --git a/pkg/solana/client/multinode/node_lifecycle.go b/pkg/solana/client/multinode/node_lifecycle.go index 427ccb216..d6b150690 100644 --- a/pkg/solana/client/multinode/node_lifecycle.go +++ b/pkg/solana/client/multinode/node_lifecycle.go @@ -335,7 +335,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) onNewHead(lggr logger.SugaredLogger, chainIn chainInfo.BlockNumber = head.BlockNumber() if !n.chainCfg.FinalityTagEnabled() { - latestFinalizedBN := max(head.BlockNumber()-uint64(n.chainCfg.FinalityDepth()), 0) + latestFinalizedBN := max(head.BlockNumber()-int64(n.chainCfg.FinalityDepth()), 0) if latestFinalizedBN > chainInfo.FinalizedBlockNumber { promPoolRPCNodeHighestFinalizedBlock.WithLabelValues(n.chainID.String(), n.name).Set(float64(latestFinalizedBN)) chainInfo.FinalizedBlockNumber = latestFinalizedBN @@ -368,7 +368,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) isOutOfSyncWithPool(localState ChainInfo) (o mode := n.nodePoolCfg.SelectionMode() switch mode { case NodeSelectionModeHighestHead, NodeSelectionModeRoundRobin, NodeSelectionModePriorityLevel: - return localState.BlockNumber < ci.BlockNumber-uint64(threshold), ln + return localState.BlockNumber < ci.BlockNumber-int64(threshold), ln case NodeSelectionModeTotalDifficulty: bigThreshold := big.NewInt(int64(threshold)) return localState.TotalDifficulty.Cmp(bigmath.Sub(ci.TotalDifficulty, bigThreshold)) < 0, ln diff --git a/pkg/solana/client/multinode/node_selector_highest_head.go b/pkg/solana/client/multinode/node_selector_highest_head.go index c7d0d1e3d..68901cba3 100644 --- a/pkg/solana/client/multinode/node_selector_highest_head.go +++ b/pkg/solana/client/multinode/node_selector_highest_head.go @@ -1,5 +1,7 @@ package client +import "math" + type highestHeadNodeSelector[ CHAIN_ID ID, RPC any, @@ -13,7 +15,7 @@ func NewHighestHeadNodeSelector[ } func (s highestHeadNodeSelector[CHAIN_ID, RPC]) Select() Node[CHAIN_ID, RPC] { - var highestHeadNumber uint64 + var highestHeadNumber int64 = math.MinInt64 var highestHeadNodes []Node[CHAIN_ID, RPC] for _, n := range s { state, currentChainInfo := n.StateAndLatest() diff --git a/pkg/solana/client/multinode/types.go b/pkg/solana/client/multinode/types.go index 2c177a9dc..51b70e573 100644 --- a/pkg/solana/client/multinode/types.go +++ b/pkg/solana/client/multinode/types.go @@ -68,7 +68,7 @@ type RPCClient[ // Head is the interface required by the NodeClient type Head interface { - BlockNumber() uint64 + BlockNumber() int64 BlockDifficulty() *big.Int IsValid() bool } @@ -86,8 +86,8 @@ type PoolChainInfoProvider interface { // ChainInfo - defines RPC's or MultiNode's view on the chain type ChainInfo struct { - BlockNumber uint64 - FinalizedBlockNumber uint64 + BlockNumber int64 + FinalizedBlockNumber int64 TotalDifficulty *big.Int } From fd3823bfb9c35c6b7e49e675419fcff04da05a75 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 14:17:14 -0400 Subject: [PATCH 18/22] Validate node config --- pkg/solana/chain.go | 4 ---- pkg/solana/config/toml.go | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index e2747014c..d63b6ab3b 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -238,10 +238,6 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L var nodes []mn.Node[mn.StringID, *client.Client] for i, nodeInfo := range cfg.ListNodes() { - if nodeInfo == nil || nodeInfo.Name == nil || nodeInfo.URL == nil { - return nil, fmt.Errorf("node config contains nil: %+v", nodeInfo) - } - // create client and check rpcClient, err := client.NewClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) if err != nil { lggr.Warnw("failed to create client", "name", *nodeInfo.Name, "solana-url", nodeInfo.URL.String(), "err", err.Error()) diff --git a/pkg/solana/config/toml.go b/pkg/solana/config/toml.go index b1cdfc7f5..a39b9f297 100644 --- a/pkg/solana/config/toml.go +++ b/pkg/solana/config/toml.go @@ -192,6 +192,23 @@ func (c *TOMLConfig) ValidateConfig() (err error) { if len(c.Nodes) == 0 { err = errors.Join(err, config.ErrMissing{Name: "Nodes", Msg: "must have at least one node"}) } + + for _, node := range c.Nodes { + if node == nil { + err = errors.Join(err, config.ErrMissing{Name: "Node", Msg: "required for all nodes"}) + } + if node.Name == nil { + err = errors.Join(err, config.ErrMissing{Name: "Name", Msg: "required for all nodes"}) + } else if *node.Name == "" { + err = errors.Join(err, config.ErrEmpty{Name: "Name", Msg: "required for all nodes"}) + } + if node.URL == nil { + err = errors.Join(err, config.ErrMissing{Name: "URL", Msg: "required for all nodes"}) + } else if (*url.URL)(node.URL) == nil { + err = errors.Join(err, config.ErrEmpty{Name: "URL", Msg: "required for all nodes"}) + } + } + return } From 4bf96b7f8763e471cb1ab5076040b2e94c79fb24 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 12 Sep 2024 14:25:10 -0400 Subject: [PATCH 19/22] Update toml.go --- pkg/solana/config/toml.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/pkg/solana/config/toml.go b/pkg/solana/config/toml.go index a39b9f297..599729ec5 100644 --- a/pkg/solana/config/toml.go +++ b/pkg/solana/config/toml.go @@ -194,9 +194,6 @@ func (c *TOMLConfig) ValidateConfig() (err error) { } for _, node := range c.Nodes { - if node == nil { - err = errors.Join(err, config.ErrMissing{Name: "Node", Msg: "required for all nodes"}) - } if node.Name == nil { err = errors.Join(err, config.ErrMissing{Name: "Name", Msg: "required for all nodes"}) } else if *node.Name == "" { From c1b83a5a3ae779790f64b6a9f5711760bf268da1 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 18 Sep 2024 10:14:04 -0400 Subject: [PATCH 20/22] Add SendOnly nodes --- pkg/solana/chain.go | 9 +++++++-- pkg/solana/client/client.go | 5 +++-- pkg/solana/config/config.go | 5 +++-- pkg/solana/config/toml.go | 15 +-------------- 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/pkg/solana/chain.go b/pkg/solana/chain.go index d63b6ab3b..8b4ad5787 100644 --- a/pkg/solana/chain.go +++ b/pkg/solana/chain.go @@ -236,6 +236,7 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L mnCfg := cfg.MultiNodeConfig() var nodes []mn.Node[mn.StringID, *client.Client] + var sendOnlyNodes []mn.SendOnlyNode[mn.StringID, *client.Client] for i, nodeInfo := range cfg.ListNodes() { rpcClient, err := client.NewClient(nodeInfo.URL.String(), cfg, DefaultRequestTimeout, logger.Named(lggr, "Client."+*nodeInfo.Name)) @@ -248,7 +249,11 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L mnCfg, mnCfg, lggr, *nodeInfo.URL.URL(), nil, *nodeInfo.Name, i, mn.StringID(id), 0, rpcClient, chainFamily) - nodes = append(nodes, newNode) + if nodeInfo.SendOnly { + sendOnlyNodes = append(sendOnlyNodes, newNode) + } else { + nodes = append(nodes, newNode) + } } multiNode := mn.NewMultiNode[mn.StringID, *client.Client]( @@ -256,7 +261,7 @@ func newChain(id string, cfg *config.TOMLConfig, ks loop.Keystore, lggr logger.L mn.NodeSelectionModeRoundRobin, 0, nodes, - []mn.SendOnlyNode[mn.StringID, *client.Client]{}, + sendOnlyNodes, mn.StringID(id), chainFamily, mnCfg.DeathDeclarationDelay(), diff --git a/pkg/solana/client/client.go b/pkg/solana/client/client.go index 35ab96e62..785e7e508 100644 --- a/pkg/solana/client/client.go +++ b/pkg/solana/client/client.go @@ -73,10 +73,11 @@ type Head struct { } func (h *Head) BlockNumber() int64 { - if h.BlockHeight == nil { + if !h.IsValid() { return 0 } - //nolint:gosec + // nolint:gosec + // G115: integer overflow conversion uint64 -> int64 return int64(*h.BlockHeight) } diff --git a/pkg/solana/config/config.go b/pkg/solana/config/config.go index 9d5cdc5a9..28698c7c3 100644 --- a/pkg/solana/config/config.go +++ b/pkg/solana/config/config.go @@ -146,8 +146,9 @@ func (c *Chain) SetDefaults() { } type Node struct { - Name *string - URL *config.URL + Name *string + URL *config.URL + SendOnly bool } func (n *Node) ValidateConfig() (err error) { diff --git a/pkg/solana/config/toml.go b/pkg/solana/config/toml.go index 599729ec5..90657fd2c 100644 --- a/pkg/solana/config/toml.go +++ b/pkg/solana/config/toml.go @@ -105,6 +105,7 @@ func setFromNode(n, f *Node) { if f.URL != nil { n.URL = f.URL } + n.SendOnly = f.SendOnly } type TOMLConfig struct { @@ -192,20 +193,6 @@ func (c *TOMLConfig) ValidateConfig() (err error) { if len(c.Nodes) == 0 { err = errors.Join(err, config.ErrMissing{Name: "Nodes", Msg: "must have at least one node"}) } - - for _, node := range c.Nodes { - if node.Name == nil { - err = errors.Join(err, config.ErrMissing{Name: "Name", Msg: "required for all nodes"}) - } else if *node.Name == "" { - err = errors.Join(err, config.ErrEmpty{Name: "Name", Msg: "required for all nodes"}) - } - if node.URL == nil { - err = errors.Join(err, config.ErrMissing{Name: "URL", Msg: "required for all nodes"}) - } else if (*url.URL)(node.URL) == nil { - err = errors.Join(err, config.ErrEmpty{Name: "URL", Msg: "required for all nodes"}) - } - } - return } From 8aa39f61386a169b22b86796de40ba79e7834389 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 19 Sep 2024 11:59:27 -0400 Subject: [PATCH 21/22] Use test context --- pkg/solana/chain_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/solana/chain_test.go b/pkg/solana/chain_test.go index 3f1fdaf23..c5ccc7307 100644 --- a/pkg/solana/chain_test.go +++ b/pkg/solana/chain_test.go @@ -1,9 +1,9 @@ package solana import ( - "context" "errors" "fmt" + "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" "io" "net/http" "net/http/httptest" @@ -175,7 +175,7 @@ func TestSolanaChain_VerifiedClient(t *testing.T) { testChain.id = "incorrect" c, err = testChain.verifiedClient(node) assert.NoError(t, err) - _, err = c.ChainID(context.Background()) + _, err = c.ChainID(tests.Context(t)) // expect error from id mismatch (even if using a cached client) when performing RPC calls assert.Error(t, err) assert.Equal(t, fmt.Sprintf("client returned mismatched chain id (expected: %s, got: %s): %s", "incorrect", "devnet", node.URL), err.Error()) From 0a016db415e0596e48688adafbc9f63bab536110 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 19 Sep 2024 12:03:15 -0400 Subject: [PATCH 22/22] lint --- pkg/solana/chain_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/solana/chain_test.go b/pkg/solana/chain_test.go index c5ccc7307..6fb966740 100644 --- a/pkg/solana/chain_test.go +++ b/pkg/solana/chain_test.go @@ -3,7 +3,6 @@ package solana import ( "errors" "fmt" - "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" "io" "net/http" "net/http/httptest" @@ -17,6 +16,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/config" "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" "github.com/smartcontractkit/chainlink-solana/pkg/solana/client" solcfg "github.com/smartcontractkit/chainlink-solana/pkg/solana/config"