From fefb1008d25ead44c3f0c0c0a64016cbff3462f9 Mon Sep 17 00:00:00 2001 From: iamsofonias Date: Fri, 15 Dec 2023 13:52:10 -0500 Subject: [PATCH 01/37] Add BLS Keystore --- lib/bls_keystore.go | 147 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 lib/bls_keystore.go diff --git a/lib/bls_keystore.go b/lib/bls_keystore.go new file mode 100644 index 000000000..b1356da6c --- /dev/null +++ b/lib/bls_keystore.go @@ -0,0 +1,147 @@ +package lib + +import ( + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/consensus" + "github.com/pkg/errors" +) + +// BLSSigner is a wrapper for the bls.PrivateKey type, which abstracts away the private key +// and only exposes protected methods for signing a select set of message types needed for +// Proof of Stake. It allows signing for: +// - PoS Validator Votes Messages +// - PoS Validator Timeout Messages +// - PoS Block Proposals +// - PoS Validator Connection Handshakes +// - PoS Random Seed Signature +// +// TODO: We will likely need to associate individual op-codes for each message type that can be signed, +// so that there is no risk of signature collisions between different message types. Ex: the payload +// signed per message type must be made up of the following tuples: +// - Validator Vote: (0x01, view uint64, blockHash consensus.BlockHash) +// - Validator Timeout: (0x02, view uint64, highQCView uint64) +// - PoS Block Proposal: (0x03, view uint64, blockHash consensus.BlockHash) +// - PoS Validator Handshake: (0x04, peer's random nonce, our node's random nonce) +// - PoS Random Seed Signature: (previous block's random seed hash) + +type BLSSignatureOpCode byte + +const ( + BLSSignatureOpCodeValidatorVote BLSSignatureOpCode = 0 + BLSSignatureOpCodeValidatorTimeout BLSSignatureOpCode = 1 + BLSSignatureOpCodePoSBlockProposal BLSSignatureOpCode = 2 + BLSSignatureOpCodePoSValidatorHandshake BLSSignatureOpCode = 3 +) + +func (opCode BLSSignatureOpCode) Bytes() []byte { + return []byte{byte(opCode)} +} + +////////////////////////////////////////////////////////// +// BLSKeystore +////////////////////////////////////////////////////////// + +type BLSKeystore struct { + signer *BLSSigner +} + +func NewBLSKeystore(seed string) (*BLSKeystore, error) { + privateKey, err := bls.NewPrivateKey() + if err != nil { + return nil, errors.Wrapf(err, "NewBLSKeystore: Problem generating private key from seed: %s", seed) + } + if _, err = privateKey.FromString(seed); err != nil { + return nil, errors.Wrapf(err, "NewBLSKeystore: Problem retrieving private key from seed: %s", seed) + } + + signer, err := NewBLSSigner(privateKey) + if err != nil { + return nil, err + } + return &BLSKeystore{signer: signer}, nil +} + +func (keystore *BLSKeystore) GetSigner() *BLSSigner { + return keystore.signer +} + +////////////////////////////////////////////////////////// +// BLSSigner +////////////////////////////////////////////////////////// + +type BLSSigner struct { + privateKey *bls.PrivateKey +} + +func NewBLSSigner(privateKey *bls.PrivateKey) (*BLSSigner, error) { + if privateKey == nil { + return nil, errors.New("NewBLSSigner: privateKey cannot be nil") + } + return &BLSSigner{privateKey: privateKey}, nil +} + +func (signer *BLSSigner) sign(opCode BLSSignatureOpCode, payload []byte) (*bls.Signature, error) { + newPayload := append(opCode.Bytes(), payload...) + return signer.privateKey.Sign(newPayload) +} + +func (signer *BLSSigner) GetPublicKey() *bls.PublicKey { + return signer.privateKey.PublicKey() +} + +func (signer *BLSSigner) SignBlockProposal(view uint64, blockHash consensus.BlockHash) (*bls.Signature, error) { + // A block proposer's signature on a block is just its partial vote signature. This allows us to aggregate + // signatures from the proposer and validators into a single aggregated signature to build a QC. + return signer.SignValidatorVote(view, blockHash) +} + +func (signer *BLSSigner) SignValidatorVote(view uint64, blockHash consensus.BlockHash) (*bls.Signature, error) { + payload := consensus.GetVoteSignaturePayload(view, blockHash) + return signer.sign(BLSSignatureOpCodeValidatorVote, payload[:]) +} + +func (signer *BLSSigner) SignValidatorTimeout(view uint64, highQCView uint64) (*bls.Signature, error) { + payload := consensus.GetTimeoutSignaturePayload(view, highQCView) + return signer.sign(BLSSignatureOpCodeValidatorTimeout, payload[:]) +} + +func (signer *BLSSigner) SignRandomSeedHash(randomSeedHash *RandomSeedHash) (*bls.Signature, error) { + return SignRandomSeedHash(signer.privateKey, randomSeedHash) +} + +// TODO: Add signing function for PoS blocks + +func (signer *BLSSigner) SignPoSValidatorHandshake(nonceSent uint64, nonceReceived uint64, tstampMicro uint64) (*bls.Signature, error) { + // FIXME + payload := []byte{} + return signer.sign(BLSSignatureOpCodePoSValidatorHandshake, payload[:]) +} + +////////////////////////////////////////////////////////// +// BLS Verification +////////////////////////////////////////////////////////// + +func _blsVerify(opCode BLSSignatureOpCode, payload []byte, signature *bls.Signature, publicKey *bls.PublicKey) (bool, error) { + newPayload := append(opCode.Bytes(), payload...) + return publicKey.Verify(signature, newPayload) +} + +func BLSVerifyValidatorVote(view uint64, blockHash consensus.BlockHash, signature *bls.Signature, publicKey *bls.PublicKey) (bool, error) { + payload := consensus.GetVoteSignaturePayload(view, blockHash) + return _blsVerify(BLSSignatureOpCodeValidatorVote, payload[:], signature, publicKey) +} + +func BLSVerifyValidatorTimeout(view uint64, highQCView uint64, signature *bls.Signature, publicKey *bls.PublicKey) (bool, error) { + payload := consensus.GetTimeoutSignaturePayload(view, highQCView) + return _blsVerify(BLSSignatureOpCodeValidatorTimeout, payload[:], signature, publicKey) +} + +// TODO: Add Verifier function for PoS blocks + +func BLSVerifyPoSValidatorHandshake(nonceSent uint64, nonceReceived uint64, tstampMicro uint64, + signature *bls.Signature, publicKey *bls.PublicKey) (bool, error) { + + // FIXME + payload := []byte{} + return _blsVerify(BLSSignatureOpCodePoSValidatorHandshake, payload[:], signature, publicKey) +} From a7edc6c55cbbf84ff3659e89e43d51ad531b0519 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:52:42 +0100 Subject: [PATCH 02/37] PoS Validator Constants and Network Changes (#876) * PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment * Constants and network changes * Test MsgDeSoVerack encoding * Fix snapshot hack * Revert "Remove constants/network" This reverts commit b467ddbcd034c2e8d2728a7e77f4b714b686a760. * Fix compilation errors * Address review comments --- lib/connection_manager.go | 9 +- lib/constants.go | 38 ++++++- lib/network.go | 234 +++++++++++++++++++++++++++++++------- lib/network_test.go | 46 +++++++- lib/peer.go | 16 +-- lib/server.go | 18 +-- lib/snapshot.go | 8 +- 7 files changed, 294 insertions(+), 75 deletions(-) diff --git a/lib/connection_manager.go b/lib/connection_manager.go index a14742c8b..4cefe9527 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -891,13 +891,6 @@ func (cmgr *ConnectionManager) Start() { // Start the peer's message loop. pp.Start() - - // Signal the server about the new Peer in case it wants to do something with it. - cmgr.serverMessageQueue <- &ServerMessage{ - Peer: pp, - Msg: &MsgDeSoNewPeer{}, - } - } case pp := <-cmgr.donePeerChan: { @@ -920,7 +913,7 @@ func (cmgr *ConnectionManager) Start() { // with it. cmgr.serverMessageQueue <- &ServerMessage{ Peer: pp, - Msg: &MsgDeSoDonePeer{}, + Msg: &MsgDeSoDisconnectedPeer{}, } } } diff --git a/lib/constants.go b/lib/constants.go index 308b08e27..eccf1c582 100644 --- a/lib/constants.go +++ b/lib/constants.go @@ -478,6 +478,22 @@ func GetEncoderMigrationHeightsList(forkHeights *ForkHeights) ( return migrationHeightsList } +type ProtocolVersionType uint64 + +const ( + ProtocolVersion0 ProtocolVersionType = 0 + ProtocolVersion1 ProtocolVersionType = 1 + ProtocolVersion2 ProtocolVersionType = 2 +) + +func NewProtocolVersionType(version uint64) ProtocolVersionType { + return ProtocolVersionType(version) +} + +func (pvt ProtocolVersionType) ToUint64() uint64 { + return uint64(pvt) +} + // DeSoParams defines the full list of possible parameters for the // DeSo network. type DeSoParams struct { @@ -486,7 +502,7 @@ type DeSoParams struct { // Set to true when we're running in regtest mode. This is useful for testing. ExtraRegtestParamUpdaterKeys map[PkMapKey]bool // The current protocol version we're running. - ProtocolVersion uint64 + ProtocolVersion ProtocolVersionType // The minimum protocol version we'll allow a peer we connect to // to have. MinProtocolVersion uint64 @@ -545,6 +561,9 @@ type DeSoParams struct { // The amount of time we wait to receive a version message from a peer. VersionNegotiationTimeout time.Duration + // The maximum number of addresses to broadcast to peers. + MaxAddressesToBroadcast uint32 + // The genesis block to use as the base of our chain. GenesisBlock *MsgDeSoBlock // The expected hash of the genesis block. Should align with what one @@ -713,6 +732,9 @@ type DeSoParams struct { // in BMF calculations. DefaultFailingTransactionBMFMultiplierBasisPoints uint64 + // HandshakeTimeoutMicroSeconds is the timeout for the peer handshake certificate. The default value is 15 minutes. + HandshakeTimeoutMicroSeconds uint64 + ForkHeights ForkHeights EncoderMigrationHeights *EncoderMigrationHeights @@ -917,7 +939,7 @@ var MainnetForkHeights = ForkHeights{ // DeSoMainnetParams defines the DeSo parameters for the mainnet. var DeSoMainnetParams = DeSoParams{ NetworkType: NetworkType_MAINNET, - ProtocolVersion: 1, + ProtocolVersion: ProtocolVersion1, MinProtocolVersion: 1, UserAgent: "Architect", DNSSeeds: []string{ @@ -1000,6 +1022,8 @@ var DeSoMainnetParams = DeSoParams{ DialTimeout: 30 * time.Second, VersionNegotiationTimeout: 30 * time.Second, + MaxAddressesToBroadcast: 10, + BlockRewardMaturity: time.Hour * 3, V1DifficultyAdjustmentFactor: 10, @@ -1126,6 +1150,9 @@ var DeSoMainnetParams = DeSoParams{ // The rate of the failing transaction's fee used in BMF calculations. DefaultFailingTransactionBMFMultiplierBasisPoints: uint64(2500), + // The peer handshake certificate timeout. + HandshakeTimeoutMicroSeconds: uint64(900000000), + ForkHeights: MainnetForkHeights, EncoderMigrationHeights: GetEncoderMigrationHeights(&MainnetForkHeights), EncoderMigrationHeightsList: GetEncoderMigrationHeightsList(&MainnetForkHeights), @@ -1222,7 +1249,7 @@ var TestnetForkHeights = ForkHeights{ // DeSoTestnetParams defines the DeSo parameters for the testnet. var DeSoTestnetParams = DeSoParams{ NetworkType: NetworkType_TESTNET, - ProtocolVersion: 0, + ProtocolVersion: ProtocolVersion0, MinProtocolVersion: 0, UserAgent: "Architect", DNSSeeds: []string{ @@ -1266,6 +1293,8 @@ var DeSoTestnetParams = DeSoParams{ DialTimeout: 30 * time.Second, VersionNegotiationTimeout: 30 * time.Second, + MaxAddressesToBroadcast: 10, + GenesisBlock: &GenesisBlock, GenesisBlockHashHex: GenesisBlockHashHex, @@ -1393,6 +1422,9 @@ var DeSoTestnetParams = DeSoParams{ // The rate of the failing transaction's fee used in BMF calculations. DefaultFailingTransactionBMFMultiplierBasisPoints: uint64(2500), + // The peer handshake certificate timeout. + HandshakeTimeoutMicroSeconds: uint64(900000000), + ForkHeights: TestnetForkHeights, EncoderMigrationHeights: GetEncoderMigrationHeights(&TestnetForkHeights), EncoderMigrationHeightsList: GetEncoderMigrationHeightsList(&TestnetForkHeights), diff --git a/lib/network.go b/lib/network.go index 15b230a83..bb7fcae4e 100644 --- a/lib/network.go +++ b/lib/network.go @@ -17,11 +17,10 @@ import ( "strings" "time" - "github.com/deso-protocol/core/collections/bitset" - "github.com/golang/glog" - "github.com/decred/dcrd/dcrec/secp256k1/v4" + "github.com/golang/glog" + "github.com/deso-protocol/core/collections/bitset" "github.com/deso-protocol/core/consensus" "github.com/btcsuite/btcd/btcec" @@ -111,11 +110,12 @@ const ( // TODO: Should probably split these out into a separate channel in the server to // make things more parallelized. - MsgTypeQuit MsgType = ControlMessagesStart - MsgTypeNewPeer MsgType = ControlMessagesStart + 1 - MsgTypeDonePeer MsgType = ControlMessagesStart + 2 - MsgTypeBlockAccepted MsgType = ControlMessagesStart + 3 - MsgTypeBitcoinManagerUpdate MsgType = ControlMessagesStart + 4 // Deprecated + MsgTypeQuit MsgType = ControlMessagesStart + MsgTypeDisconnectedPeer MsgType = ControlMessagesStart + 1 + MsgTypeBlockAccepted MsgType = ControlMessagesStart + 2 + MsgTypeBitcoinManagerUpdate MsgType = ControlMessagesStart + 3 // Deprecated + MsgTypePeerHandshakeComplete MsgType = ControlMessagesStart + 4 + MsgTypeNewConnection MsgType = ControlMessagesStart + 5 // NEXT_TAG = 7 ) @@ -173,14 +173,16 @@ func (msgType MsgType) String() string { return "GET_ADDR" case MsgTypeQuit: return "QUIT" - case MsgTypeNewPeer: - return "NEW_PEER" - case MsgTypeDonePeer: + case MsgTypeDisconnectedPeer: return "DONE_PEER" case MsgTypeBlockAccepted: return "BLOCK_ACCEPTED" case MsgTypeBitcoinManagerUpdate: return "BITCOIN_MANAGER_UPDATE" + case MsgTypePeerHandshakeComplete: + return "PEER_HANDSHAKE_COMPLETE" + case MsgTypeNewConnection: + return "NEW_CONNECTION" case MsgTypeGetSnapshot: return "GET_SNAPSHOT" case MsgTypeSnapshotData: @@ -837,34 +839,64 @@ func (msg *MsgDeSoQuit) FromBytes(data []byte) error { return fmt.Errorf("MsgDeSoQuit.FromBytes not implemented") } -type MsgDeSoNewPeer struct { +type MsgDeSoDisconnectedPeer struct { } -func (msg *MsgDeSoNewPeer) GetMsgType() MsgType { - return MsgTypeNewPeer +func (msg *MsgDeSoDisconnectedPeer) GetMsgType() MsgType { + return MsgTypeDisconnectedPeer } -func (msg *MsgDeSoNewPeer) ToBytes(preSignature bool) ([]byte, error) { - return nil, fmt.Errorf("MsgDeSoNewPeer.ToBytes: Not implemented") +func (msg *MsgDeSoDisconnectedPeer) ToBytes(preSignature bool) ([]byte, error) { + return nil, fmt.Errorf("MsgDeSoDisconnectedPeer.ToBytes: Not implemented") } -func (msg *MsgDeSoNewPeer) FromBytes(data []byte) error { - return fmt.Errorf("MsgDeSoNewPeer.FromBytes not implemented") +func (msg *MsgDeSoDisconnectedPeer) FromBytes(data []byte) error { + return fmt.Errorf("MsgDeSoDisconnectedPeer.FromBytes not implemented") } -type MsgDeSoDonePeer struct { +// MsgDeSoPeerHandshakeComplete is a control message that is used to internally signal when a peer has +// connected and completed the Version + Verack handshake and authentication process. +type MsgDeSoPeerHandshakeComplete struct { } -func (msg *MsgDeSoDonePeer) GetMsgType() MsgType { - return MsgTypeDonePeer +func (msg *MsgDeSoPeerHandshakeComplete) GetMsgType() MsgType { + return MsgTypePeerHandshakeComplete } -func (msg *MsgDeSoDonePeer) ToBytes(preSignature bool) ([]byte, error) { - return nil, fmt.Errorf("MsgDeSoDonePeer.ToBytes: Not implemented") +func (msg *MsgDeSoPeerHandshakeComplete) ToBytes(preSignature bool) ([]byte, error) { + return nil, fmt.Errorf("MsgDeSoPeerHandshakeComplete.ToBytes: Not implemented") } -func (msg *MsgDeSoDonePeer) FromBytes(data []byte) error { - return fmt.Errorf("MsgDeSoDonePeer.FromBytes not implemented") +func (msg *MsgDeSoPeerHandshakeComplete) FromBytes(data []byte) error { + return fmt.Errorf("MsgDeSoPeerHandshakeComplete.FromBytes not implemented") +} + +type ConnectionType uint8 + +const ( + ConnectionTypeOutbound ConnectionType = iota + ConnectionTypeInbound +) + +type Connection interface { + GetConnectionType() ConnectionType + Close() +} + +type MsgDeSoNewConnection struct { + Connection Connection +} + +func (msg *MsgDeSoNewConnection) GetMsgType() MsgType { + return MsgTypeNewConnection +} + +func (msg *MsgDeSoNewConnection) ToBytes(preSignature bool) ([]byte, error) { + return nil, fmt.Errorf("MsgDeSoNewConnection.ToBytes: Not implemented") +} + +func (msg *MsgDeSoNewConnection) FromBytes(data []byte) error { + return fmt.Errorf("MsgDeSoNewConnection.FromBytes not implemented") } // ================================================================== @@ -1513,12 +1545,14 @@ type ServiceFlag uint64 const ( // SFFullNodeDeprecated is deprecated, and set on all nodes by default // now. We basically split it into SFHyperSync and SFArchivalMode. - SFFullNodeDeprecated ServiceFlag = 1 << iota + SFFullNodeDeprecated ServiceFlag = 1 << 0 // SFHyperSync is a flag used to indicate that the peer supports hyper sync. - SFHyperSync + SFHyperSync ServiceFlag = 1 << 1 // SFArchivalNode is a flag complementary to SFHyperSync. If node is a hypersync node then // it might not be able to support block sync anymore, unless it has archival mode turned on. - SFArchivalNode + SFArchivalNode ServiceFlag = 1 << 2 + // SFPosValidator is a flag used to indicate that the peer is running a PoS validator. + SFPosValidator ServiceFlag = 1 << 3 ) type MsgDeSoVersion struct { @@ -1864,34 +1898,148 @@ func (msg *MsgDeSoGetAddr) GetMsgType() MsgType { // VERACK Message // ================================================================== -// VERACK messages have no payload. +type VerackVersion uint64 + +func NewVerackVersion(version uint64) VerackVersion { + return VerackVersion(version) +} + +const ( + VerackVersion0 VerackVersion = 0 + VerackVersion1 VerackVersion = 1 +) + +func (vv VerackVersion) ToUint64() uint64 { + return uint64(vv) +} + type MsgDeSoVerack struct { - // A verack message must contain the nonce the peer received in the - // initial version message. This ensures the peer that is communicating - // with us actually controls the address she says she does similar to - // "SYN Cookie" DDOS protection. - Nonce uint64 + // The VerackVersion0 message contains only the NonceReceived field, which is the nonce the sender received in the + // initial version message from the peer. This ensures the sender controls the network address, similarly to the + // "SYN Cookie" DDOS protection. The Version field in the VerackVersion0 message is implied, based on the msg length. + // + // The VerackVersion1 message contains the tuple of which correspond to the + // received and sent nonces in the version message from the sender's perspective, as well as a recent timestamp. + // The VerackVersion1 message is used in context of Proof of Stake, where validators register their BLS public keys + // as part of their validator entry. The sender of this message must be a registered validator, and he must attach + // their public key to the message, along with a BLS signature of the tuple. + Version VerackVersion + + NonceReceived uint64 + NonceSent uint64 + TstampMicro uint64 + + PublicKey *bls.PublicKey + Signature *bls.Signature } func (msg *MsgDeSoVerack) ToBytes(preSignature bool) ([]byte, error) { + switch msg.Version { + case VerackVersion0: + return msg.EncodeVerackV0() + case VerackVersion1: + return msg.EncodeVerackV1() + default: + return nil, fmt.Errorf("MsgDeSoVerack.ToBytes: Unrecognized version: %v", msg.Version) + } +} + +func (msg *MsgDeSoVerack) EncodeVerackV0() ([]byte, error) { retBytes := []byte{} // Nonce - retBytes = append(retBytes, UintToBuf(msg.Nonce)...) + retBytes = append(retBytes, UintToBuf(msg.NonceReceived)...) + return retBytes, nil +} + +func (msg *MsgDeSoVerack) EncodeVerackV1() ([]byte, error) { + if msg.PublicKey == nil || msg.Signature == nil { + return nil, fmt.Errorf("MsgDeSoVerack.EncodeVerackV1: PublicKey and Signature must be set for V1 message") + } + + retBytes := []byte{} + + // Version + retBytes = append(retBytes, UintToBuf(msg.Version.ToUint64())...) + // Nonce Received + retBytes = append(retBytes, UintToBuf(msg.NonceReceived)...) + // Nonce Sent + retBytes = append(retBytes, UintToBuf(msg.NonceSent)...) + // Tstamp Micro + retBytes = append(retBytes, UintToBuf(msg.TstampMicro)...) + // PublicKey + retBytes = append(retBytes, EncodeBLSPublicKey(msg.PublicKey)...) + // Signature + retBytes = append(retBytes, EncodeBLSSignature(msg.Signature)...) + return retBytes, nil } func (msg *MsgDeSoVerack) FromBytes(data []byte) error { rr := bytes.NewReader(data) - retMsg := NewMessage(MsgTypeVerack).(*MsgDeSoVerack) - { - nonce, err := ReadUvarint(rr) - if err != nil { - return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Nonce") - } - retMsg.Nonce = nonce + // The V0 verack message is determined from the message length. The V0 message will only contain the NonceReceived field. + if len(data) <= MaxVarintLen64 { + return msg.FromBytesV0(data) + } + + version, err := ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Version") + } + msg.Version = NewVerackVersion(version) + switch msg.Version { + case VerackVersion0: + return fmt.Errorf("MsgDeSoVerack.FromBytes: Outdated Version=0 used for new encoding") + case VerackVersion1: + return msg.FromBytesV1(data) + default: + return fmt.Errorf("MsgDeSoVerack.FromBytes: Unrecognized version: %v", msg.Version) + } +} + +func (msg *MsgDeSoVerack) FromBytesV0(data []byte) error { + var err error + rr := bytes.NewReader(data) + msg.NonceReceived, err = ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Nonce") + } + return nil +} + +func (msg *MsgDeSoVerack) FromBytesV1(data []byte) error { + var err error + rr := bytes.NewReader(data) + version, err := ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Version") + } + msg.Version = NewVerackVersion(version) + + msg.NonceReceived, err = ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Nonce Received") + } + + msg.NonceSent, err = ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Nonce Sent") + } + + msg.TstampMicro, err = ReadUvarint(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Tstamp Micro") + } + + msg.PublicKey, err = DecodeBLSPublicKey(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading PublicKey") + } + + msg.Signature, err = DecodeBLSSignature(rr) + if err != nil { + return errors.Wrapf(err, "MsgDeSoVerack.FromBytes: Problem reading Signature") } - *msg = *retMsg return nil } diff --git a/lib/network_test.go b/lib/network_test.go index fbd9932e2..e9d6e6e5e 100644 --- a/lib/network_test.go +++ b/lib/network_test.go @@ -5,6 +5,8 @@ package lib import ( "bytes" "encoding/hex" + "github.com/deso-protocol/core/bls" + "golang.org/x/crypto/sha3" "math/big" "math/rand" "reflect" @@ -68,7 +70,7 @@ func TestVersionConversion(t *testing.T) { "works, add the new field to the test case, and fix this error.") } -func TestVerack(t *testing.T) { +func TestVerackV0(t *testing.T) { assert := assert.New(t) require := require.New(t) _ = assert @@ -78,13 +80,51 @@ func TestVerack(t *testing.T) { var buf bytes.Buffer nonce := uint64(12345678910) - _, err := WriteMessage(&buf, &MsgDeSoVerack{Nonce: nonce}, networkType) + _, err := WriteMessage(&buf, &MsgDeSoVerack{Version: VerackVersion0, NonceReceived: nonce}, networkType) require.NoError(err) verBytes := buf.Bytes() testMsg, _, err := ReadMessage(bytes.NewReader(verBytes), networkType) require.NoError(err) - require.Equal(&MsgDeSoVerack{Nonce: nonce}, testMsg) + require.Equal(&MsgDeSoVerack{Version: VerackVersion0, NonceReceived: nonce}, testMsg) +} + +func TestVerackV1(t *testing.T) { + require := require.New(t) + + networkType := NetworkType_MAINNET + var buf bytes.Buffer + + nonceReceived := uint64(12345678910) + nonceSent := nonceReceived + 1 + tstamp := uint64(2345678910) + // First, test that nil public key and signature are not allowed. + msg := &MsgDeSoVerack{ + Version: VerackVersion1, + NonceReceived: nonceReceived, + NonceSent: nonceSent, + TstampMicro: tstamp, + PublicKey: nil, + Signature: nil, + } + _, err := WriteMessage(&buf, msg, networkType) + require.Error(err) + payload := append(UintToBuf(nonceReceived), UintToBuf(nonceSent)...) + payload = append(payload, UintToBuf(tstamp)...) + hash := sha3.Sum256(payload) + + priv, err := bls.NewPrivateKey() + require.NoError(err) + msg.PublicKey = priv.PublicKey() + msg.Signature, err = priv.Sign(hash[:]) + require.NoError(err) + _, err = WriteMessage(&buf, msg, networkType) + require.NoError(err) + + verBytes := buf.Bytes() + testMsg, _, err := ReadMessage(bytes.NewReader(verBytes), networkType) + require.NoError(err) + require.Equal(msg, testMsg) } // Creates fully formatted a PoS block header with random signatures diff --git a/lib/peer.go b/lib/peer.go index 654a62ada..0eea6317a 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -1153,7 +1153,7 @@ out: // Measure the ping time when we receive a pong. pp.HandlePongMsg(msg) - case *MsgDeSoNewPeer, *MsgDeSoDonePeer, *MsgDeSoQuit: + case *MsgDeSoDisconnectedPeer, *MsgDeSoQuit: // We should never receive control messages from a Peer. Disconnect if we do. glog.Errorf("Peer.inHandler: Received control message of type %v from "+ @@ -1284,7 +1284,7 @@ func (pp *Peer) ReadDeSoMessage() (DeSoMessage, error) { func (pp *Peer) NewVersionMessage(params *DeSoParams) *MsgDeSoVersion { ver := NewMessage(MsgTypeVersion).(*MsgDeSoVersion) - ver.Version = params.ProtocolVersion + ver.Version = params.ProtocolVersion.ToUint64() ver.TstampSecs = time.Now().Unix() // We use an int64 instead of a uint64 for convenience but // this should be fine since we're just looking to generate a @@ -1324,7 +1324,7 @@ func (pp *Peer) sendVerack() error { verackMsg := NewMessage(MsgTypeVerack) // Include the nonce we received in the peer's version message so // we can validate that we actually control our IP address. - verackMsg.(*MsgDeSoVerack).Nonce = pp.VersionNonceReceived + verackMsg.(*MsgDeSoVerack).NonceReceived = pp.VersionNonceReceived if err := pp.WriteDeSoMessage(verackMsg); err != nil { return errors.Wrap(err, "sendVerack: ") } @@ -1343,10 +1343,10 @@ func (pp *Peer) readVerack() error { msg.GetMsgType().String()) } verackMsg := msg.(*MsgDeSoVerack) - if verackMsg.Nonce != pp.VersionNonceSent { + if verackMsg.NonceReceived != pp.VersionNonceSent { return fmt.Errorf( "readVerack: Received VERACK message with nonce %d but expected nonce %d", - verackMsg.Nonce, pp.VersionNonceSent) + verackMsg.NonceReceived, pp.VersionNonceSent) } return nil @@ -1407,10 +1407,10 @@ func (pp *Peer) readVersion() error { pp.serviceFlags = verMsg.Services pp.advertisedProtocolVersion = verMsg.Version negotiatedVersion := pp.Params.ProtocolVersion - if pp.advertisedProtocolVersion < pp.Params.ProtocolVersion { - negotiatedVersion = pp.advertisedProtocolVersion + if pp.advertisedProtocolVersion < pp.Params.ProtocolVersion.ToUint64() { + negotiatedVersion = NewProtocolVersionType(pp.advertisedProtocolVersion) } - pp.negotiatedProtocolVersion = negotiatedVersion + pp.negotiatedProtocolVersion = negotiatedVersion.ToUint64() pp.PeerInfoMtx.Unlock() // Set the stats-related fields. diff --git a/lib/server.go b/lib/server.go index 95ef2be74..4260f65fb 100644 --- a/lib/server.go +++ b/lib/server.go @@ -703,11 +703,13 @@ func (srv *Server) GetSnapshot(pp *Peer) { } // If operationQueueSemaphore is full, we are already storing too many chunks in memory. Block the thread while // we wait for the queue to clear up. - srv.snapshot.operationQueueSemaphore <- struct{}{} - // Now send a message to the peer to fetch the snapshot chunk. - pp.AddDeSoMessage(&MsgDeSoGetSnapshot{ - SnapshotStartKey: lastReceivedKey, - }, false) + go func() { + srv.snapshot.operationQueueSemaphore <- struct{}{} + // Now send a message to the peer to fetch the snapshot chunk. + pp.AddDeSoMessage(&MsgDeSoGetSnapshot{ + SnapshotStartKey: lastReceivedKey, + }, false) + }() glog.V(2).Infof("Server.GetSnapshot: Sending a GetSnapshot message to peer (%v) "+ "with Prefix (%v) and SnapshotStartEntry (%v)", pp, prefix, lastReceivedKey) @@ -1144,6 +1146,8 @@ func (srv *Server) _handleSnapshot(pp *Peer, msg *MsgDeSoSnapshotData) { "<%v>, Last entry: <%v>), (number of entries: %v), metadata (%v), and isEmpty (%v), from Peer %v", msg.SnapshotChunk[0].Key, msg.SnapshotChunk[len(msg.SnapshotChunk)-1].Key, len(msg.SnapshotChunk), msg.SnapshotMetadata, msg.SnapshotChunk[0].IsEmpty(), pp))) + // Free up a slot in the operationQueueSemaphore, now that a chunk has been processed. + srv.snapshot.FreeOperationQueueSemaphore() // There is a possibility that during hypersync the network entered a new snapshot epoch. We handle this case by // restarting the node and starting hypersync from scratch. @@ -2217,9 +2221,7 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_shouldQuit bool) { switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. - case *MsgDeSoNewPeer: - srv._handleNewPeer(serverMessage.Peer) - case *MsgDeSoDonePeer: + case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) case *MsgDeSoQuit: return true diff --git a/lib/snapshot.go b/lib/snapshot.go index 9d9eda085..6ab97631a 100644 --- a/lib/snapshot.go +++ b/lib/snapshot.go @@ -471,8 +471,6 @@ func (snap *Snapshot) Run() { operation.blockHeight); err != nil { glog.Errorf("Snapshot.Run: Problem adding snapshot chunk to the db") } - // Free up a slot in the operationQueueSemaphore, now that a chunk has been processed. - <-snap.operationQueueSemaphore case SnapshotOperationChecksumAdd: if err := snap.Checksum.AddOrRemoveBytesWithMigrations(operation.checksumKey, operation.checksumValue, @@ -1348,6 +1346,12 @@ func (snap *Snapshot) SetSnapshotChunk(mainDb *badger.DB, mainDbMutex *deadlock. return nil } +func (snap *Snapshot) FreeOperationQueueSemaphore() { + if len(snap.operationQueueSemaphore) > 0 { + <-snap.operationQueueSemaphore + } +} + // ------------------------------------------------------------------------------------- // StateChecksum // ------------------------------------------------------------------------------------- From b2b113c7f379c1108de0303cfda3afb00a73ee6c Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Fri, 29 Dec 2023 23:58:18 +0100 Subject: [PATCH 03/37] PoS Validator Connect/Disconnect Flow (#768) * PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment * Another review round * gofmt * Comment change --- lib/connection_manager.go | 577 ++++++++++++--------------------- lib/network_connection.go | 212 ++++++++++++ lib/network_connection_test.go | 167 ++++++++++ lib/peer.go | 32 +- 4 files changed, 603 insertions(+), 385 deletions(-) create mode 100644 lib/network_connection.go create mode 100644 lib/network_connection_test.go diff --git a/lib/connection_manager.go b/lib/connection_manager.go index 4cefe9527..38924bdf9 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -5,6 +5,7 @@ import ( "math" "net" "strconv" + "sync" "sync/atomic" "time" @@ -12,7 +13,6 @@ import ( chainlib "github.com/btcsuite/btcd/blockchain" "github.com/btcsuite/btcd/wire" "github.com/decred/dcrd/lru" - "github.com/deso-protocol/go-deadlock" "github.com/golang/glog" "github.com/pkg/errors" ) @@ -80,17 +80,26 @@ type ConnectionManager struct { // concurrently by many goroutines to figure out if outbound connections // should be made to particular addresses. - mtxOutboundConnIPGroups deadlock.Mutex + mtxOutboundConnIPGroups sync.Mutex outboundConnIPGroups map[string]int // The peer maps map peer ID to peers for various types of peer connections. // // A persistent peer is typically one we got through a commandline argument. // The reason it's called persistent is because we maintain a connection to // it, and retry the connection if it fails. - mtxPeerMaps deadlock.RWMutex + mtxPeerMaps sync.RWMutex persistentPeers map[uint64]*Peer outboundPeers map[uint64]*Peer inboundPeers map[uint64]*Peer + connectedPeers map[uint64]*Peer + + mtxConnectionAttempts sync.Mutex + // outboundConnectionAttempts keeps track of the outbound connections, mapping attemptId [uint64] -> connection attempt. + outboundConnectionAttempts map[uint64]*OutboundConnectionAttempt + // outboundConnectionChan is used to signal successful outbound connections to the connection manager. + outboundConnectionChan chan *outboundConnection + // inboundConnectionChan is used to signal successful inbound connections to the connection manager. + inboundConnectionChan chan *inboundConnection // Track the number of outbound peers we have so that this value can // be accessed concurrently when deciding whether or not to add more // outbound peers. @@ -102,11 +111,9 @@ type ConnectionManager struct { // avoid choosing them in the address manager. We need a mutex on this // guy because many goroutines will be querying the address manager // at once. - mtxConnectedOutboundAddrs deadlock.RWMutex - connectedOutboundAddrs map[string]bool - - // Used to set peer ids. Must be incremented atomically. - peerIndex uint64 + mtxAddrsMaps sync.RWMutex + connectedOutboundAddrs map[string]bool + attemptedOutboundAddrs map[string]bool serverMessageQueue chan *ServerMessage @@ -156,15 +163,19 @@ func NewConnectionManager( //newestBlock: _newestBlock, // Initialize the peer data structures. - outboundConnIPGroups: make(map[string]int), - persistentPeers: make(map[uint64]*Peer), - outboundPeers: make(map[uint64]*Peer), - inboundPeers: make(map[uint64]*Peer), - connectedOutboundAddrs: make(map[string]bool), + outboundConnIPGroups: make(map[string]int), + persistentPeers: make(map[uint64]*Peer), + outboundPeers: make(map[uint64]*Peer), + inboundPeers: make(map[uint64]*Peer), + connectedPeers: make(map[uint64]*Peer), + outboundConnectionAttempts: make(map[uint64]*OutboundConnectionAttempt), + connectedOutboundAddrs: make(map[string]bool), + attemptedOutboundAddrs: make(map[string]bool), // Initialize the channels. - newPeerChan: make(chan *Peer), - donePeerChan: make(chan *Peer), + newPeerChan: make(chan *Peer, 100), + donePeerChan: make(chan *Peer, 100), + outboundConnectionChan: make(chan *outboundConnection, 100), targetOutboundPeers: _targetOutboundPeers, maxInboundPeers: _maxInboundPeers, @@ -177,13 +188,8 @@ func NewConnectionManager( } } -func (cmgr *ConnectionManager) GetAddrManager() *addrmgr.AddrManager { - return cmgr.AddrMgr -} - -// Check if the address passed shares a group with any addresses already in our -// data structures. -func (cmgr *ConnectionManager) isRedundantGroupKey(na *wire.NetAddress) bool { +// Check if the address passed shares a group with any addresses already in our data structures. +func (cmgr *ConnectionManager) IsFromRedundantOutboundIPAddress(na *wire.NetAddress) bool { groupKey := addrmgr.GroupKey(na) cmgr.mtxOutboundConnIPGroups.Lock() @@ -191,7 +197,7 @@ func (cmgr *ConnectionManager) isRedundantGroupKey(na *wire.NetAddress) bool { cmgr.mtxOutboundConnIPGroups.Unlock() if numGroupsForKey != 0 && numGroupsForKey != 1 { - glog.V(2).Infof("isRedundantGroupKey: Found numGroupsForKey != (0 or 1). Is (%d) "+ + glog.V(2).Infof("IsFromRedundantOutboundIPAddress: Found numGroupsForKey != (0 or 1). Is (%d) "+ "instead for addr (%s) and group key (%s). This "+ "should never happen.", numGroupsForKey, na.IP.String(), groupKey) } @@ -220,25 +226,25 @@ func (cmgr *ConnectionManager) subFromGroupKey(na *wire.NetAddress) { func (cmgr *ConnectionManager) getRandomAddr() *wire.NetAddress { for tries := 0; tries < 100; tries++ { - // Lock the address map since multiple threads will be trying to read - // and modify it at the same time. - cmgr.mtxConnectedOutboundAddrs.RLock() addr := cmgr.AddrMgr.GetAddress() - cmgr.mtxConnectedOutboundAddrs.RUnlock() - if addr == nil { glog.V(2).Infof("ConnectionManager.getRandomAddr: addr from GetAddressWithExclusions was nil") break } - if cmgr.connectedOutboundAddrs[addrmgr.NetAddressKey(addr.NetAddress())] { + // Lock the address map since multiple threads will be trying to read + // and modify it at the same time. + cmgr.mtxAddrsMaps.RLock() + ok := cmgr.connectedOutboundAddrs[addrmgr.NetAddressKey(addr.NetAddress())] + cmgr.mtxAddrsMaps.RUnlock() + if ok { glog.V(2).Infof("ConnectionManager.getRandomAddr: Not choosing already connected address %v:%v", addr.NetAddress().IP, addr.NetAddress().Port) continue } // We can only have one outbound address per /16. This is similar to // Bitcoin and we do it to prevent Sybil attacks. - if cmgr.isRedundantGroupKey(addr.NetAddress()) { + if cmgr.IsFromRedundantOutboundIPAddress(addr.NetAddress()) { glog.V(2).Infof("ConnectionManager.getRandomAddr: Not choosing address due to redundant group key %v:%v", addr.NetAddress().IP, addr.NetAddress().Port) continue } @@ -252,14 +258,13 @@ func (cmgr *ConnectionManager) getRandomAddr() *wire.NetAddress { return nil } -func _delayRetry(retryCount int, persistentAddrForLogging *wire.NetAddress) { +func _delayRetry(retryCount uint64, persistentAddrForLogging *wire.NetAddress, unit time.Duration) (_retryDuration time.Duration) { // No delay if we haven't tried yet or if the number of retries isn't positive. if retryCount <= 0 { - time.Sleep(time.Second) - return + return 0 } numSecs := int(math.Pow(2.0, float64(retryCount))) - retryDelay := time.Duration(numSecs) * time.Second + retryDelay := time.Duration(numSecs) * unit if persistentAddrForLogging != nil { glog.V(1).Infof("Retrying connection to outbound persistent peer: "+ @@ -268,7 +273,7 @@ func _delayRetry(retryCount int, persistentAddrForLogging *wire.NetAddress) { } else { glog.V(2).Infof("Retrying connection to outbound non-persistent peer in (%d) seconds.", numSecs) } - time.Sleep(retryDelay) + return retryDelay } func (cmgr *ConnectionManager) enoughOutboundPeers() bool { @@ -286,85 +291,6 @@ func (cmgr *ConnectionManager) enoughOutboundPeers() bool { return false } -// Chooses a random address and tries to connect to it. Repeats this process until -// it finds a peer that can pass version negotiation. -func (cmgr *ConnectionManager) _getOutboundConn(persistentAddr *wire.NetAddress) net.Conn { - // If a persistentAddr was provided then the connection is a persistent - // one. - isPersistent := (persistentAddr != nil) - retryCount := 0 - for { - if atomic.LoadInt32(&cmgr.shutdown) != 0 { - glog.Info("_getOutboundConn: Ignoring connection due to shutdown") - return nil - } - // We want to start backing off exponentially once we've gone through enough - // unsuccessful retries. However, we want to give more slack to non-persistent - // peers before we start backing off, which is why it's not as cut and dry as - // just delaying based on the raw number of retries. - adjustedRetryCount := retryCount - if !isPersistent { - // If the address is not persistent, only start backing off once there - // has been a large number of failed attempts in a row as this likely indicates - // that there's a connection issue we need to wait out. - adjustedRetryCount = retryCount - 5 - } - _delayRetry(adjustedRetryCount, persistentAddr) - retryCount++ - - // If the connection manager is saturated with non-persistent - // outbound peers, no need to keep trying non-persistent outbound - // connections. - if !isPersistent && cmgr.enoughOutboundPeers() { - glog.V(1).Infof("Dropping connection request to non-persistent outbound " + - "peer because we have enough of them.") - return nil - } - - // If we don't have a persistentAddr, pick one from our addrmgr. - ipNetAddr := persistentAddr - if ipNetAddr == nil { - ipNetAddr = cmgr.getRandomAddr() - } - if ipNetAddr == nil { - // This should never happen but if it does, sleep a bit and try again. - glog.V(1).Infof("_getOutboundConn: No valid addresses to connect to.") - time.Sleep(time.Second) - continue - } - - netAddr := net.TCPAddr{ - IP: ipNetAddr.IP, - Port: int(ipNetAddr.Port), - } - - // If the peer is not persistent, update the addrmgr. - glog.V(1).Infof("Attempting to connect to addr: %v", netAddr) - if !isPersistent { - cmgr.AddrMgr.Attempt(ipNetAddr) - } - var err error - conn, err := net.DialTimeout(netAddr.Network(), netAddr.String(), cmgr.params.DialTimeout) - if err != nil { - // If we failed to connect to this peer, get a new address and try again. - glog.V(1).Infof("Connection to addr (%v) failed: %v", netAddr, err) - continue - } - - // We were able to dial successfully so we'll break out now. - glog.V(1).Infof("Connected to addr: %v", netAddr) - - // If this was a non-persistent outbound connection, mark the address as - // connected in the addrmgr. - if !isPersistent { - cmgr.AddrMgr.Connected(ipNetAddr) - } - - // We made a successful outbound connection so return. - return conn - } -} - func IPToNetAddr(ipStr string, addrMgr *addrmgr.AddrManager, params *DeSoParams) (*wire.NetAddress, error) { port := params.DefaultSocketPort host, portstr, err := net.SplitHostPort(ipStr) @@ -386,159 +312,102 @@ func IPToNetAddr(ipStr string, addrMgr *addrmgr.AddrManager, params *DeSoParams) return netAddr, nil } -// ConnectPeer connects either an INBOUND or OUTBOUND peer. If Conn == nil, -// then we will set up an OUTBOUND peer. Otherwise we will use the Conn to -// create an INBOUND peer. If the connection is OUTBOUND and the persistentAddr -// is set, then we will connect only to that addr. Otherwise, we will use -// the addrmgr to randomly select addrs and create OUTBOUND connections -// with them until we find a worthy peer. -func (cmgr *ConnectionManager) ConnectPeer(conn net.Conn, persistentAddr *wire.NetAddress) { - // If we don't have a connection object then we will try and make an - // outbound connection to a peer to get one. - isOutbound := false - if conn == nil { - isOutbound = true - } - isPersistent := (persistentAddr != nil) - retryCount := 0 - for { - // If the peer is persistent use exponential back off delay before retrying. - if isPersistent { - _delayRetry(retryCount, persistentAddr) - } - retryCount++ - - // If this is an outbound peer, create an outbound connection. - if isOutbound { - conn = cmgr._getOutboundConn(persistentAddr) - } - - if conn == nil { - // Conn should only be nil if this is a non-persistent outbound peer. - if isPersistent { - glog.Errorf("ConnectPeer: Got a nil connection for a persistent peer. This should never happen: (%s)", persistentAddr.IP.String()) - } +func (cmgr *ConnectionManager) IsConnectedOutboundIpAddress(netAddr *wire.NetAddress) bool { + cmgr.mtxAddrsMaps.RLock() + defer cmgr.mtxAddrsMaps.RUnlock() + return cmgr.connectedOutboundAddrs[addrmgr.NetAddressKey(netAddr)] +} - // If we end up without a connection object, it implies we had enough - // outbound peers so just return. - return - } +func (cmgr *ConnectionManager) IsAttemptedOutboundIpAddress(netAddr *wire.NetAddress) bool { + cmgr.mtxAddrsMaps.RLock() + defer cmgr.mtxAddrsMaps.RUnlock() + return cmgr.attemptedOutboundAddrs[addrmgr.NetAddressKey(netAddr)] +} - // At this point Conn is set so create a peer object to do - // a version negotiation. - na, err := IPToNetAddr(conn.RemoteAddr().String(), cmgr.AddrMgr, cmgr.params) - if err != nil { - glog.Errorf("ConnectPeer: Problem calling ipToNetAddr for addr: (%s) err: (%v)", conn.RemoteAddr().String(), err) +func (cmgr *ConnectionManager) AddAttemptedOutboundAddrs(netAddr *wire.NetAddress) { + cmgr.mtxAddrsMaps.Lock() + defer cmgr.mtxAddrsMaps.Unlock() + cmgr.attemptedOutboundAddrs[addrmgr.NetAddressKey(netAddr)] = true +} - // If we get an error in the conversion and this is an - // outbound connection, keep trying it. Otherwise, just return. - if isOutbound { - continue - } - return - } - peer := NewPeer(conn, isOutbound, na, isPersistent, - cmgr.stallTimeoutSeconds, - cmgr.minFeeRateNanosPerKB, - cmgr.params, - cmgr.srv.incomingMessages, cmgr, cmgr.srv, cmgr.SyncType) - - if err := peer.NegotiateVersion(cmgr.params.VersionNegotiationTimeout); err != nil { - glog.Errorf("ConnectPeer: Problem negotiating version with peer with addr: (%s) err: (%v)", conn.RemoteAddr().String(), err) - - // If we have an error in the version negotiation we disconnect - // from this peer. - peer.Conn.Close() - - // If the connection is outbound, then - // we try a new connection until we get one that works. Otherwise - // we break. - if isOutbound { - continue - } - return - } - peer._logVersionSuccess() +func (cmgr *ConnectionManager) RemoveAttemptedOutboundAddrs(netAddr *wire.NetAddress) { + cmgr.mtxAddrsMaps.Lock() + defer cmgr.mtxAddrsMaps.Unlock() + delete(cmgr.attemptedOutboundAddrs, addrmgr.NetAddressKey(netAddr)) +} - // If the version negotiation worked and we have an outbound non-persistent - // connection, mark the address as good in the addrmgr. - if isOutbound && !isPersistent { - cmgr.AddrMgr.Good(na) - } +// DialPersistentOutboundConnection attempts to connect to a persistent peer. +func (cmgr *ConnectionManager) DialPersistentOutboundConnection(persistentAddr *wire.NetAddress, attemptId uint64) (_attemptId uint64) { + glog.V(2).Infof("ConnectionManager.DialPersistentOutboundConnection: Connecting to peer %v", persistentAddr.IP.String()) + return cmgr._dialOutboundConnection(persistentAddr, attemptId, true) +} - // We connected to the peer and it passed its version negotiation. - // Handle the next steps in the main loop. - cmgr.newPeerChan <- peer +// DialOutboundConnection attempts to connect to a non-persistent peer. +func (cmgr *ConnectionManager) DialOutboundConnection(addr *wire.NetAddress, attemptId uint64) { + glog.V(2).Infof("ConnectionManager.ConnectOutboundConnection: Connecting to peer %v", addr.IP.String()) + cmgr._dialOutboundConnection(addr, attemptId, false) +} - // Once we've successfully connected to a valid peer we're done. The connection - // manager will handle starting the peer and, if this is an outbound peer and - // the peer later disconnects, - // it will potentially try and reconnect the peer or replace the peer with - // a new one so that we always maintain a fixed number of outbound peers. - return +// CloseAttemptedConnection closes an ongoing connection attempt. +func (cmgr *ConnectionManager) CloseAttemptedConnection(attemptId uint64) { + glog.V(2).Infof("ConnectionManager.CloseAttemptedConnection: Closing connection attempt %d", attemptId) + cmgr.mtxConnectionAttempts.Lock() + defer cmgr.mtxConnectionAttempts.Unlock() + if attempt, exists := cmgr.outboundConnectionAttempts[attemptId]; exists { + attempt.Stop() + delete(cmgr.outboundConnectionAttempts, attemptId) } } -func (cmgr *ConnectionManager) _initiateOutboundConnections() { - // This is a hack to make outbound connections go away. - if cmgr.targetOutboundPeers == 0 { - return - } - if len(cmgr.connectIps) > 0 { - // Connect to addresses passed via the --connect-ips flag. These addresses - // are persistent in the sense that if we disconnect from one, we will - // try to reconnect to the same one. - for _, connectIp := range cmgr.connectIps { - ipNetAddr, err := IPToNetAddr(connectIp, cmgr.AddrMgr, cmgr.params) - if err != nil { - glog.Error(errors.Errorf("Couldn't connect to IP %v: %v", connectIp, err)) - continue - } +// _dialOutboundConnection is the internal method that spawns and initiates an OutboundConnectionAttempt, which handles the +// connection attempt logic. It returns the attemptId of the attempt that was created. +func (cmgr *ConnectionManager) _dialOutboundConnection(addr *wire.NetAddress, attemptId uint64, isPersistent bool) (_attemptId uint64) { + connectionAttempt := NewOutboundConnectionAttempt(attemptId, addr, isPersistent, + cmgr.params.DialTimeout, cmgr.outboundConnectionChan) + cmgr.mtxConnectionAttempts.Lock() + cmgr.outboundConnectionAttempts[connectionAttempt.attemptId] = connectionAttempt + cmgr.mtxConnectionAttempts.Unlock() + cmgr.AddAttemptedOutboundAddrs(addr) + + connectionAttempt.Start() + return attemptId +} - go func(na *wire.NetAddress) { - cmgr.ConnectPeer(nil, na) - }(ipNetAddr) - } - return - } - // Only connect to addresses from the addrmgr if we don't specify --connect-ips. - // These addresses are *not* persistent, meaning if we disconnect from one we'll - // try a different one. - // - // TODO: We should try more addresses than we need initially to increase the - // speed at which we saturate our outbound connections. The ConnectionManager - // will handle the disconnection from peers once we have enough outbound - // connections. I had this as the logic before but removed it because it caused - // contention of the AddrMgr's lock. - for ii := 0; ii < int(cmgr.targetOutboundPeers); ii++ { - go cmgr.ConnectPeer(nil, nil) - } +// ConnectPeer connects either an INBOUND or OUTBOUND peer. If Conn == nil, +// then we will set up an OUTBOUND peer. Otherwise we will use the Conn to +// create an INBOUND peer. If the connection is OUTBOUND and the persistentAddr +// is set, then we will connect only to that addr. Otherwise, we will use +// the addrmgr to randomly select addrs and create OUTBOUND connections +// with them until we find a worthy peer. +func (cmgr *ConnectionManager) ConnectPeer(id uint64, conn net.Conn, na *wire.NetAddress, isOutbound bool, + isPersistent bool) *Peer { + + // At this point Conn is set so create a peer object to do a version negotiation. + peer := NewPeer(id, conn, isOutbound, na, isPersistent, + cmgr.stallTimeoutSeconds, + cmgr.minFeeRateNanosPerKB, + cmgr.params, + cmgr.srv.incomingMessages, cmgr, cmgr.srv, cmgr.SyncType, + cmgr.newPeerChan, cmgr.donePeerChan) + + // Now we can add the peer to our data structures. + peer._logAddPeer() + cmgr.addPeer(peer) + + // Start the peer's message loop. + peer.Start() + + return peer } -func (cmgr *ConnectionManager) _isFromRedundantInboundIPAddress(addrToCheck net.Addr) bool { +func (cmgr *ConnectionManager) IsFromRedundantInboundIPAddress(netAddr *wire.NetAddress) bool { cmgr.mtxPeerMaps.RLock() defer cmgr.mtxPeerMaps.RUnlock() // Loop through all the peers to see if any have the same IP // address. This map is normally pretty small so doing this // every time a Peer connects should be fine. - netAddr, err := IPToNetAddr(addrToCheck.String(), cmgr.AddrMgr, cmgr.params) - if err != nil { - // Return true in case we have an error. We do this because it - // will result in the peer connection not being accepted, which - // is desired in this case. - glog.Warningf(errors.Wrapf(err, - "ConnectionManager._isFromRedundantInboundIPAddress: Problem parsing "+ - "net.Addr to wire.NetAddress so marking as redundant and not "+ - "making connection").Error()) - return true - } - if netAddr == nil { - glog.Warningf("ConnectionManager._isFromRedundantInboundIPAddress: " + - "address was nil after parsing so marking as redundant and not " + - "making connection") - return true - } + // If the IP is a localhost IP let it slide. This is useful for testing fake // nodes on a local machine. // TODO: Should this be a flag? @@ -578,38 +447,9 @@ func (cmgr *ConnectionManager) _handleInboundConnections() { continue } - // As a quick check, reject the peer if we have too many already. Note that - // this check isn't perfect but we have a later check at the end after doing - // a version negotiation that will properly reject the peer if this check - // messes up e.g. due to a concurrency issue. - // - // TODO: We should instead have eviction logic here to prevent - // someone from monopolizing a node's inbound connections. - numInboundPeers := atomic.LoadUint32(&cmgr.numInboundPeers) - if numInboundPeers > cmgr.maxInboundPeers { - - glog.Infof("Rejecting INBOUND peer (%s) due to max inbound peers (%d) hit.", - conn.RemoteAddr().String(), cmgr.maxInboundPeers) - conn.Close() - - continue - } - - // If we want to limit inbound connections to one per IP address, check to - // make sure this address isn't already connected. - if cmgr.limitOneInboundConnectionPerIP && - cmgr._isFromRedundantInboundIPAddress(conn.RemoteAddr()) { - - glog.Infof("Rejecting INBOUND peer (%s) due to already having an "+ - "inbound connection from the same IP with "+ - "limit_one_inbound_connection_per_ip set.", - conn.RemoteAddr().String()) - conn.Close() - - continue + cmgr.inboundConnectionChan <- &inboundConnection{ + connection: conn, } - - go cmgr.ConnectPeer(conn, nil) } }(outerListener) } @@ -622,13 +462,7 @@ func (cmgr *ConnectionManager) GetAllPeers() []*Peer { defer cmgr.mtxPeerMaps.RUnlock() allPeers := []*Peer{} - for _, pp := range cmgr.persistentPeers { - allPeers = append(allPeers, pp) - } - for _, pp := range cmgr.outboundPeers { - allPeers = append(allPeers, pp) - } - for _, pp := range cmgr.inboundPeers { + for _, pp := range cmgr.connectedPeers { allPeers = append(allPeers, pp) } @@ -689,9 +523,9 @@ func (cmgr *ConnectionManager) addPeer(pp *Peer) { cmgr.addToGroupKey(pp.netAddr) atomic.AddUint32(&cmgr.numOutboundPeers, 1) - cmgr.mtxConnectedOutboundAddrs.Lock() + cmgr.mtxAddrsMaps.Lock() cmgr.connectedOutboundAddrs[addrmgr.NetAddressKey(pp.netAddr)] = true - cmgr.mtxConnectedOutboundAddrs.Unlock() + cmgr.mtxAddrsMaps.Unlock() } } else { // This is an inbound peer. @@ -700,10 +534,45 @@ func (cmgr *ConnectionManager) addPeer(pp *Peer) { } peerList[pp.ID] = pp + cmgr.connectedPeers[pp.ID] = pp +} + +func (cmgr *ConnectionManager) getPeer(id uint64) *Peer { + cmgr.mtxPeerMaps.RLock() + defer cmgr.mtxPeerMaps.RUnlock() + + if peer, ok := cmgr.connectedPeers[id]; ok { + return peer + } + return nil +} + +func (cmgr *ConnectionManager) SendMessage(msg DeSoMessage, peerId uint64) error { + peer := cmgr.getPeer(peerId) + if peer == nil { + return fmt.Errorf("SendMessage: Peer with ID %d not found", peerId) + } + glog.V(1).Infof("SendMessage: Sending message %v to peer %d", msg.GetMsgType().String(), peerId) + peer.AddDeSoMessage(msg, false) + return nil +} + +func (cmgr *ConnectionManager) CloseConnection(peerId uint64) { + glog.V(2).Infof("ConnectionManager.CloseConnection: Closing connection to peer (id= %v)", peerId) + + var peer *Peer + var ok bool + cmgr.mtxPeerMaps.Lock() + peer, ok = cmgr.connectedPeers[peerId] + cmgr.mtxPeerMaps.Unlock() + if !ok { + return + } + peer.Disconnect() } // Update our data structures to remove this peer. -func (cmgr *ConnectionManager) RemovePeer(pp *Peer) { +func (cmgr *ConnectionManager) removePeer(pp *Peer) { // Acquire the mtxPeerMaps lock for writing. cmgr.mtxPeerMaps.Lock() defer cmgr.mtxPeerMaps.Unlock() @@ -724,9 +593,9 @@ func (cmgr *ConnectionManager) RemovePeer(pp *Peer) { cmgr.subFromGroupKey(pp.netAddr) atomic.AddUint32(&cmgr.numOutboundPeers, Uint32Dec) - cmgr.mtxConnectedOutboundAddrs.Lock() + cmgr.mtxAddrsMaps.Lock() delete(cmgr.connectedOutboundAddrs, addrmgr.NetAddressKey(pp.netAddr)) - cmgr.mtxConnectedOutboundAddrs.Unlock() + cmgr.mtxAddrsMaps.Unlock() } } else { // This is an inbound peer. @@ -737,25 +606,12 @@ func (cmgr *ConnectionManager) RemovePeer(pp *Peer) { // Update the last seen time before we finish removing the peer. // TODO: Really, we call 'Connected()' on removing a peer? // I can't find a Disconnected() but seems odd. - cmgr.AddrMgr.Connected(pp.netAddr) + // FIXME: Move this to Done Peer + //cmgr.AddrMgr.Connected(pp.netAddr) // Remove the peer from our data structure. delete(peerList, pp.ID) -} - -func (cmgr *ConnectionManager) _maybeReplacePeer(pp *Peer) { - // If the peer was outbound, replace her with a - // new peer to maintain a fixed number of outbound connections. - if pp.isOutbound { - // If the peer is not persistent then we don't want to pass an - // address to connectPeer. The lack of an address will cause it - // to choose random addresses from the addrmgr until one works. - na := pp.netAddr - if !pp.isPersistent { - na = nil - } - go cmgr.ConnectPeer(nil, na) - } + delete(cmgr.connectedPeers, pp.ID) } func (cmgr *ConnectionManager) _logOutboundPeerData() { @@ -775,12 +631,30 @@ func (cmgr *ConnectionManager) _logOutboundPeerData() { cmgr.mtxOutboundConnIPGroups.Unlock() } +func (cmgr *ConnectionManager) AddTimeSample(addrStr string, timeSample time.Time) { + cmgr.timeSource.AddTimeSample(addrStr, timeSample) +} + +func (cmgr *ConnectionManager) GetNumInboundPeers() uint32 { + return atomic.LoadUint32(&cmgr.numInboundPeers) +} + +func (cmgr *ConnectionManager) GetNumOutboundPeers() uint32 { + return atomic.LoadUint32(&cmgr.numOutboundPeers) +} + func (cmgr *ConnectionManager) Stop() { + cmgr.mtxPeerMaps.Lock() + defer cmgr.mtxPeerMaps.Unlock() + if atomic.AddInt32(&cmgr.shutdown, 1) != 1 { glog.Warningf("ConnectionManager.Stop is already in the process of " + "shutting down") return } + for id := range cmgr.outboundConnectionAttempts { + cmgr.CloseAttemptedConnection(id) + } glog.Infof("ConnectionManager: Stopping, number of inbound peers (%v), number of outbound "+ "peers (%v), number of persistent peers (%v).", len(cmgr.inboundPeers), len(cmgr.outboundPeers), len(cmgr.persistentPeers)) @@ -823,10 +697,6 @@ func (cmgr *ConnectionManager) Start() { // - Have the peer enter a switch statement listening for all kinds of messages. // - Send addr and getaddr messages as appropriate. - // Initiate outbound connections with peers either using the --connect-ips passed - // in or using the addrmgr. - cmgr._initiateOutboundConnections() - // Accept inbound connections from peers on our listeners. cmgr._handleInboundConnections() @@ -837,60 +707,24 @@ func (cmgr *ConnectionManager) Start() { cmgr._logOutboundPeerData() select { - case pp := <-cmgr.newPeerChan: - { - // We have successfully connected to a peer and it passed its version - // negotiation. - - // if this is a non-persistent outbound peer and we already have enough - // outbound peers, then don't bother adding this one. - if !pp.isPersistent && pp.isOutbound && cmgr.enoughOutboundPeers() { - // TODO: Make this less verbose - glog.V(1).Infof("Dropping peer because we already have enough outbound peer connections.") - pp.Conn.Close() - continue - } - - // If this is a non-persistent outbound peer and the group key - // overlaps with another peer we're already connected to then - // abort mission. We only connect to one peer per IP group in - // order to prevent Sybil attacks. - if pp.isOutbound && - !pp.isPersistent && - cmgr.isRedundantGroupKey(pp.netAddr) { - - // TODO: Make this less verbose - glog.Infof("Rejecting OUTBOUND NON-PERSISTENT peer (%v) with "+ - "redundant group key (%s).", - pp, addrmgr.GroupKey(pp.netAddr)) - - pp.Conn.Close() - cmgr._maybeReplacePeer(pp) - continue - } - - // Check that we have not exceeded the maximum number of inbound - // peers allowed. - // - // TODO: We should instead have eviction logic to prevent - // someone from monopolizing a node's inbound connections. - numInboundPeers := atomic.LoadUint32(&cmgr.numInboundPeers) - if !pp.isOutbound && numInboundPeers > cmgr.maxInboundPeers { - - // TODO: Make this less verbose - glog.Infof("Rejecting INBOUND peer (%v) due to max inbound peers (%d) hit.", - pp, cmgr.maxInboundPeers) - - pp.Conn.Close() - continue - } - - // Now we can add the peer to our data structures. - pp._logAddPeer() - cmgr.addPeer(pp) - - // Start the peer's message loop. - pp.Start() + case oc := <-cmgr.outboundConnectionChan: + glog.V(2).Infof("ConnectionManager.Start: Successfully established an outbound connection with "+ + "(addr= %v)", oc.connection.RemoteAddr()) + delete(cmgr.outboundConnectionAttempts, oc.attemptId) + cmgr.serverMessageQueue <- &ServerMessage{ + Peer: nil, + Msg: &MsgDeSoNewConnection{ + Connection: oc, + }, + } + case ic := <-cmgr.inboundConnectionChan: + glog.V(2).Infof("ConnectionManager.Start: Successfully received an inbound connection from "+ + "(addr= %v)", ic.connection.RemoteAddr()) + cmgr.serverMessageQueue <- &ServerMessage{ + Peer: nil, + Msg: &MsgDeSoNewConnection{ + Connection: ic, + }, } case pp := <-cmgr.donePeerChan: { @@ -900,14 +734,11 @@ func (cmgr *ConnectionManager) Start() { glog.V(1).Infof("Done with peer (%v).", pp) - if !pp.PeerManuallyRemovedFromConnectionManager { - // Remove the peer from our data structures. - cmgr.RemovePeer(pp) + // Remove the peer from our data structures. + cmgr.removePeer(pp) - // Potentially replace the peer. For example, if the Peer was an outbound Peer - // then we want to find a new peer in order to maintain our TargetOutboundPeers. - cmgr._maybeReplacePeer(pp) - } + // Potentially replace the peer. For example, if the Peer was an outbound Peer + // then we want to find a new peer in order to maintain our TargetOutboundPeers. // Signal the server about the Peer being done in case it wants to do something // with it. diff --git a/lib/network_connection.go b/lib/network_connection.go new file mode 100644 index 000000000..eb6d4ab55 --- /dev/null +++ b/lib/network_connection.go @@ -0,0 +1,212 @@ +package lib + +import ( + "github.com/btcsuite/btcd/wire" + "github.com/golang/glog" + "net" + "sync" + "time" +) + +// outboundConnection is used to store an established connection with a peer. It can also be used to signal that the +// connection was unsuccessful, in which case the failed flag is set to true. outboundConnection is created after an +// OutboundConnectionAttempt concludes. outboundConnection implements the Connection interface. +type outboundConnection struct { + mtx sync.Mutex + terminated bool + + attemptId uint64 + address *wire.NetAddress + connection net.Conn + isPersistent bool + failed bool +} + +func (oc *outboundConnection) GetConnectionType() ConnectionType { + return ConnectionTypeOutbound +} + +func (oc *outboundConnection) Close() { + oc.mtx.Lock() + defer oc.mtx.Unlock() + + if oc.terminated { + return + } + oc.connection.Close() + oc.terminated = true +} + +// inboundConnection is used to store an established connection with a peer. inboundConnection is created after +// an external peer connects to the node. inboundConnection implements the Connection interface. +type inboundConnection struct { + mtx sync.Mutex + terminated bool + + connection net.Conn +} + +func (ic *inboundConnection) GetConnectionType() ConnectionType { + return ConnectionTypeInbound +} + +func (ic *inboundConnection) Close() { + ic.mtx.Lock() + defer ic.mtx.Unlock() + + if ic.terminated { + return + } + + ic.connection.Close() + ic.terminated = true +} + +// OutboundConnectionAttempt is used to store the state of an outbound connection attempt. It is used to initiate +// an outbound connection to a peer, and manage the lifecycle of the connection attempt. +type OutboundConnectionAttempt struct { + mtx sync.Mutex + + // attemptId is used to identify the connection attempt. It will later be the id of the peer, + // if the connection is successful. + attemptId uint64 + + // netAddr is the address of the peer we are attempting to connect to. + netAddr *wire.NetAddress + // isPersistent is used to indicate whether we should retry connecting to the peer if the connection attempt fails. + // If isPersistent is true, we will retry connecting to the peer until we are successful. Each time such connection + // fails, we will sleep according to exponential backoff. Otherwise, we will only attempt to connect to the peer once. + isPersistent bool + // dialTimeout is the amount of time we will wait before timing out an individual connection attempt. + dialTimeout time.Duration + // timeoutUnit is the unit of time we will use to calculate the exponential backoff delay. The initial timeout is + // calculated as timeoutUnit * 2^0, the second timeout is calculated as timeoutUnit * 2^1, and so on. + timeoutUnit time.Duration + // retryCount is the number of times we have attempted to connect to the peer. + retryCount uint64 + // connectionChan is used to send the result of the connection attempt to the caller thread. + connectionChan chan *outboundConnection + + exitChan chan bool + status outboundConnectionAttemptStatus +} + +type outboundConnectionAttemptStatus int + +const ( + outboundConnectionAttemptInitialized outboundConnectionAttemptStatus = 0 + outboundConnectionAttemptRunning outboundConnectionAttemptStatus = 1 + outboundConnectionAttemptTerminated outboundConnectionAttemptStatus = 2 +) + +func NewOutboundConnectionAttempt(attemptId uint64, netAddr *wire.NetAddress, isPersistent bool, + dialTimeout time.Duration, connectionChan chan *outboundConnection) *OutboundConnectionAttempt { + + return &OutboundConnectionAttempt{ + attemptId: attemptId, + netAddr: netAddr, + isPersistent: isPersistent, + dialTimeout: dialTimeout, + timeoutUnit: time.Second, + exitChan: make(chan bool), + connectionChan: connectionChan, + status: outboundConnectionAttemptInitialized, + } +} + +func (oca *OutboundConnectionAttempt) Start() { + oca.mtx.Lock() + defer oca.mtx.Unlock() + + if oca.status != outboundConnectionAttemptInitialized { + return + } + + go oca.start() + oca.status = outboundConnectionAttemptRunning +} + +func (oca *OutboundConnectionAttempt) start() { + oca.retryCount = 0 + +out: + for { + sleepDuration := 0 * time.Second + // for persistent peers, calculate the exponential backoff delay. + if oca.isPersistent { + sleepDuration = _delayRetry(oca.retryCount, oca.netAddr, oca.timeoutUnit) + } + + select { + case <-oca.exitChan: + break out + case <-time.After(sleepDuration): + // If the peer is persistent use exponential back off delay before retrying. + // We want to start backing off exponentially once we've gone through enough + // unsuccessful retries. + if oca.isPersistent { + oca.retryCount++ + } + + conn := oca.attemptOutboundConnection() + if conn == nil && oca.isPersistent { + break + } + if conn == nil { + break out + } + + oca.connectionChan <- &outboundConnection{ + attemptId: oca.attemptId, + address: oca.netAddr, + connection: conn, + isPersistent: oca.isPersistent, + failed: false, + } + return + } + } + oca.connectionChan <- &outboundConnection{ + attemptId: oca.attemptId, + address: oca.netAddr, + connection: nil, + isPersistent: oca.isPersistent, + failed: true, + } +} + +func (oca *OutboundConnectionAttempt) Stop() { + oca.mtx.Lock() + defer oca.mtx.Unlock() + + if oca.status == outboundConnectionAttemptTerminated { + return + } + close(oca.exitChan) + oca.status = outboundConnectionAttemptTerminated +} + +func (oca *OutboundConnectionAttempt) SetTimeoutUnit(timeoutUnit time.Duration) { + oca.timeoutUnit = timeoutUnit +} + +// attemptOutboundConnection dials the peer. If the connection attempt is successful, it will return the connection. +// Otherwise, it will return nil. +func (oca *OutboundConnectionAttempt) attemptOutboundConnection() net.Conn { + // If the peer is not persistent, update the addrmgr. + glog.V(1).Infof("Attempting to connect to addr: %v", oca.netAddr.IP.String()) + + var err error + tcpAddr := net.TCPAddr{ + IP: oca.netAddr.IP, + Port: int(oca.netAddr.Port), + } + conn, err := net.DialTimeout(tcpAddr.Network(), tcpAddr.String(), oca.dialTimeout) + if err != nil { + // If we failed to connect to this peer, get a new address and try again. + glog.V(2).Infof("Connection to addr (%v) failed: %v", tcpAddr, err) + return nil + } + + return conn +} diff --git a/lib/network_connection_test.go b/lib/network_connection_test.go new file mode 100644 index 000000000..5d3008f72 --- /dev/null +++ b/lib/network_connection_test.go @@ -0,0 +1,167 @@ +package lib + +import ( + "fmt" + "github.com/btcsuite/btcd/addrmgr" + "github.com/btcsuite/btcd/wire" + "github.com/stretchr/testify/require" + "net" + "sync" + "testing" + "time" +) + +type simpleListener struct { + t *testing.T + ll net.Listener + addr *wire.NetAddress + closed bool + + connectionChan chan Connection + + exitChan chan struct{} + startGroup sync.WaitGroup + stopGroup sync.WaitGroup +} + +func newSimpleListener(t *testing.T) *simpleListener { + require := require.New(t) + ll, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(err) + params := &DeSoTestnetParams + addr := ll.Addr() + addrMgr := addrmgr.New("", net.LookupIP) + na, err := IPToNetAddr(addr.String(), addrMgr, params) + + return &simpleListener{ + t: t, + ll: ll, + addr: na, + closed: false, + connectionChan: make(chan Connection, 100), + exitChan: make(chan struct{}), + } +} + +func (sl *simpleListener) start() { + require := require.New(sl.t) + if sl.closed { + ll, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%v", sl.addr.Port)) + require.NoError(err) + sl.ll = ll + sl.connectionChan = make(chan Connection, 100) + sl.exitChan = make(chan struct{}) + sl.closed = false + } + sl.startGroup.Add(1) + sl.stopGroup.Add(1) + + go func() { + sl.startGroup.Done() + defer sl.stopGroup.Done() + for { + select { + case <-sl.exitChan: + return + default: + conn, err := sl.ll.Accept() + if err != nil { + fmt.Println("simpleListener.start: ll.Accept:", err) + return + } + sl.connectionChan <- &inboundConnection{ + connection: conn, + } + } + } + }() + sl.startGroup.Wait() +} + +func (sl *simpleListener) stop() { + sl.ll.Close() + sl.closed = true + close(sl.exitChan) + close(sl.connectionChan) + sl.stopGroup.Wait() + fmt.Println("simpleListener.stop: stopped") +} + +func (sl *simpleListener) getTCPAddr() *net.TCPAddr { + return sl.ll.Addr().(*net.TCPAddr) +} + +func verifyOutboundConnection(t *testing.T, conn *outboundConnection, sl *simpleListener, attemptId uint64, isPersistent bool, failed bool) { + require := require.New(t) + require.Equal(attemptId, conn.attemptId) + require.Equal(isPersistent, conn.isPersistent) + require.Equal(failed, conn.failed) + if failed { + require.Nil(conn.connection) + return + } + + require.Equal(conn.address.IP.String(), sl.getTCPAddr().IP.String()) + require.Equal(conn.address.Port, uint16(sl.getTCPAddr().Port)) + require.Equal(conn.address.IP.String(), sl.getTCPAddr().IP.String()) + require.Equal(conn.address.Port, uint16(sl.getTCPAddr().Port)) +} + +func verifyOutboundConnectionSelect(t *testing.T, connectionChan chan *outboundConnection, timeoutDuration time.Duration, + sl *simpleListener, attemptId uint64, isPersistent bool, failed bool) { + + select { + case conn := <-connectionChan: + verifyOutboundConnection(t, conn, sl, attemptId, isPersistent, failed) + case <-time.After(2 * timeoutDuration): + panic("Timed out waiting for outbound connection.") + } +} + +func TestOutboundConnectionAttempt(t *testing.T) { + require := require.New(t) + _ = require + timeoutDuration := 100 * time.Millisecond + + sl := newSimpleListener(t) + sl.start() + + connectionChan := make(chan *outboundConnection, 100) + attempt := NewOutboundConnectionAttempt(0, sl.addr, false, timeoutDuration, connectionChan) + attempt.Start() + verifyOutboundConnectionSelect(t, connectionChan, 2*timeoutDuration, sl, 0, false, false) + t.Log("TestOutboundConnectionAttempt #1 | Happy path, non-persistent | PASS") + + sl.stop() + attemptFailed := NewOutboundConnectionAttempt(1, sl.addr, false, timeoutDuration, connectionChan) + attemptFailed.Start() + verifyOutboundConnectionSelect(t, connectionChan, 2*timeoutDuration, sl, 1, false, true) + t.Log("TestOutboundConnectionAttempt #2 | Failed connection, non-persistent | PASS") + + sl2 := newSimpleListener(t) + sl2.start() + + attemptPersistent := NewOutboundConnectionAttempt(2, sl2.addr, true, timeoutDuration, connectionChan) + attemptPersistent.Start() + verifyOutboundConnectionSelect(t, connectionChan, 2*timeoutDuration, sl2, 2, true, false) + t.Log("TestOutboundConnectionAttempt #3 | Happy path, persistent | PASS") + + sl2.stop() + attemptPersistentDelay := NewOutboundConnectionAttempt(3, sl2.addr, true, timeoutDuration, connectionChan) + attemptPersistentDelay.SetTimeoutUnit(timeoutDuration) + attemptPersistentDelay.Start() + time.Sleep(timeoutDuration) + sl2.start() + verifyOutboundConnectionSelect(t, connectionChan, 2*timeoutDuration, sl2, 3, true, false) + require.Greater(attemptPersistentDelay.retryCount, uint64(0)) + t.Log("TestOutboundConnectionAttempt #4 | Failed connection, persistent, delayed | PASS") + + sl2.stop() + attemptPersistentCancel := NewOutboundConnectionAttempt(4, sl2.addr, true, timeoutDuration, connectionChan) + attemptPersistentCancel.Start() + time.Sleep(timeoutDuration) + attemptPersistentCancel.Stop() + verifyOutboundConnectionSelect(t, connectionChan, 2*timeoutDuration, sl2, 4, true, true) + require.Greater(attemptPersistentCancel.retryCount, uint64(0)) + t.Log("TestOutboundConnectionAttempt #5 | Failed connection, persistent, delayed, canceled | PASS") +} diff --git a/lib/peer.go b/lib/peer.go index 0eea6317a..2c6d76e4d 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -63,9 +63,6 @@ type Peer struct { stallTimeoutSeconds uint64 Params *DeSoParams MessageChan chan *ServerMessage - // A hack to make it so that we can allow an API endpoint to manually - // delete a peer. - PeerManuallyRemovedFromConnectionManager bool // In order to complete a version negotiation successfully, the peer must // reply to the initial version message we send them with a verack message @@ -104,6 +101,8 @@ type Peer struct { // Output queue for messages that need to be sent to the peer. outputQueueChan chan DeSoMessage + newPeerChan chan *Peer + donePeerChan chan *Peer // Set to zero until Disconnect has been called on the Peer. Used to make it // so that the logic in Disconnect will only be executed once. @@ -611,15 +610,18 @@ func (pp *Peer) StartDeSoMessageProcessor() { } // NewPeer creates a new Peer object. -func NewPeer(_conn net.Conn, _isOutbound bool, _netAddr *wire.NetAddress, +func NewPeer(_id uint64, _conn net.Conn, _isOutbound bool, _netAddr *wire.NetAddress, _isPersistent bool, _stallTimeoutSeconds uint64, _minFeeRateNanosPerKB uint64, params *DeSoParams, messageChan chan *ServerMessage, _cmgr *ConnectionManager, _srv *Server, - _syncType NodeSyncType) *Peer { + _syncType NodeSyncType, + newPeerChan chan *Peer, + donePeerChan chan *Peer) *Peer { pp := Peer{ + ID: _id, cmgr: _cmgr, srv: _srv, Conn: _conn, @@ -628,6 +630,8 @@ func NewPeer(_conn net.Conn, _isOutbound bool, _netAddr *wire.NetAddress, isOutbound: _isOutbound, isPersistent: _isPersistent, outputQueueChan: make(chan DeSoMessage), + newPeerChan: newPeerChan, + donePeerChan: donePeerChan, quit: make(chan interface{}), knownInventory: lru.NewCache(maxKnownInventory), blocksToSend: make(map[BlockHash]bool), @@ -639,9 +643,6 @@ func NewPeer(_conn net.Conn, _isOutbound bool, _netAddr *wire.NetAddress, requestedBlocks: make(map[BlockHash]bool), syncType: _syncType, } - if _cmgr != nil { - pp.ID = atomic.AddUint64(&_cmgr.peerIndex, 1) - } // TODO: Before, we would give each Peer its own Logger object. Now we // have a much better way of debugging which is that we include a nonce @@ -784,6 +785,10 @@ func (pp *Peer) Address() string { return pp.addrStr } +func (pp *Peer) NetAddress() *wire.NetAddress { + return pp.netAddr +} + func (pp *Peer) IP() string { return pp.netAddr.IP.String() } @@ -796,6 +801,10 @@ func (pp *Peer) IsOutbound() bool { return pp.isOutbound } +func (pp *Peer) IsPersistent() bool { + return pp.isPersistent +} + func (pp *Peer) QueueMessage(desoMessage DeSoMessage) { // If the peer is disconnected, don't queue anything. if !pp.Connected() { @@ -1495,10 +1504,11 @@ func (pp *Peer) NegotiateVersion(versionNegotiationTimeout time.Duration) error func (pp *Peer) Disconnect() { // Only run the logic the first time Disconnect is called. glog.V(1).Infof(CLog(Yellow, "Peer.Disconnect: Starting")) - if atomic.AddInt32(&pp.disconnected, 1) != 1 { + if atomic.LoadInt32(&pp.disconnected) != 0 { glog.V(1).Infof("Peer.Disconnect: Disconnect call ignored since it was already called before for Peer %v", pp) return } + atomic.AddInt32(&pp.disconnected, 1) glog.V(1).Infof("Peer.Disconnect: Running Disconnect for the first time for Peer %v", pp) @@ -1510,9 +1520,7 @@ func (pp *Peer) Disconnect() { // Add the Peer to donePeers so that the ConnectionManager and Server can do any // cleanup they need to do. - if pp.cmgr != nil && atomic.LoadInt32(&pp.cmgr.shutdown) == 0 && pp.cmgr.donePeerChan != nil { - pp.cmgr.donePeerChan <- pp - } + pp.donePeerChan <- pp } func (pp *Peer) _logVersionSuccess() { From 43d8e369b2e09fd8f72c9851c475a02607e71c16 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Sat, 6 Jan 2024 01:35:42 +0100 Subject: [PATCH 04/37] PoS RemoteNode and RemoteNodeId (#857) * RemoteNode and RemoteNodeId * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test * Review round * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test * Final pass --- cmd/config.go | 6 + cmd/node.go | 29 +- cmd/run.go | 4 + integration_testing/connection_bridge.go | 4 +- lib/bls_keystore.go | 6 +- lib/network.go | 17 +- lib/network_test.go | 2 +- lib/peer.go | 229 +------- lib/remote_node.go | 651 +++++++++++++++++++++++ lib/server.go | 22 +- 10 files changed, 713 insertions(+), 257 deletions(-) create mode 100644 lib/remote_node.go diff --git a/cmd/config.go b/cmd/config.go index 18da983eb..692795865 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -17,6 +17,7 @@ type Config struct { TXIndex bool Regtest bool PostgresURI string + PosValidatorSeed string // Peers ConnectIPs []string @@ -103,6 +104,7 @@ func LoadConfig() *Config { config.TXIndex = viper.GetBool("txindex") config.Regtest = viper.GetBool("regtest") config.PostgresURI = viper.GetString("postgres-uri") + config.PosValidatorSeed = viper.GetString("pos-validator-seed") config.HyperSync = viper.GetBool("hypersync") config.ForceChecksum = viper.GetBool("force-checksum") config.SyncType = lib.NodeSyncType(viper.GetString("sync-type")) @@ -176,6 +178,10 @@ func (config *Config) Print() { glog.Infof("Postgres URI: %s", config.PostgresURI) } + if config.PosValidatorSeed != "" { + glog.Infof(lib.CLog(lib.Blue, "PoS Validator: ON")) + } + if config.HyperSync { glog.Infof("HyperSync: ON") } diff --git a/cmd/node.go b/cmd/node.go index 5f59d2c2c..2a7e82ecf 100644 --- a/cmd/node.go +++ b/cmd/node.go @@ -27,12 +27,13 @@ import ( ) type Node struct { - Server *lib.Server - ChainDB *badger.DB - TXIndex *lib.TXIndex - Params *lib.DeSoParams - Config *Config - Postgres *lib.Postgres + Server *lib.Server + ChainDB *badger.DB + TXIndex *lib.TXIndex + Params *lib.DeSoParams + Config *Config + Postgres *lib.Postgres + Listeners []net.Listener // IsRunning is false when a NewNode is created, set to true on Start(), set to false // after Stop() is called. Mainly used in testing. @@ -117,8 +118,7 @@ func (node *Node) Start(exitChannels ...*chan struct{}) { // This just gets localhost listening addresses on the protocol port. // Such as [{127.0.0.1 18000 } {::1 18000 }], and associated listener structs. - listeningAddrs, listeners := GetAddrsToListenOn(node.Config.ProtocolPort) - _ = listeningAddrs + _, node.Listeners = GetAddrsToListenOn(node.Config.ProtocolPort) // If --connect-ips is not passed, we will connect the addresses from // --add-ips, DNSSeeds, and DNSSeedGenerators. @@ -202,13 +202,21 @@ func (node *Node) Start(exitChannels ...*chan struct{}) { // Setup eventManager eventManager := lib.NewEventManager() + var blsKeystore *lib.BLSKeystore + if node.Config.PosValidatorSeed != "" { + blsKeystore, err = lib.NewBLSKeystore(node.Config.PosValidatorSeed) + if err != nil { + panic(err) + } + } + // Setup the server. ShouldRestart is used whenever we detect an issue and should restart the node after a recovery // process, just in case. These issues usually arise when the node was shutdown unexpectedly mid-operation. The node // performs regular health checks to detect whenever this occurs. shouldRestart := false node.Server, err, shouldRestart = lib.NewServer( node.Params, - listeners, + node.Listeners, desoAddrMgr, node.Config.ConnectIPs, node.ChainDB, @@ -243,7 +251,8 @@ func (node *Node) Start(exitChannels ...*chan struct{}) { node.nodeMessageChan, node.Config.ForceChecksum, node.Config.StateChangeDir, - node.Config.HypersyncMaxQueueSize) + node.Config.HypersyncMaxQueueSize, + blsKeystore) if err != nil { // shouldRestart can be true if, on the previous run, we did not finish flushing all ancestral // records to the DB. In this case, the snapshot is corrupted and needs to be computed. See the diff --git a/cmd/run.go b/cmd/run.go index e6231fdd8..212acaa8f 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -58,6 +58,10 @@ func SetupRunFlags(cmd *cobra.Command) { cmd.PersistentFlags().String("postgres-uri", "", "BETA: Use Postgres as the backing store for chain data."+ "When enabled, most data is stored in postgres although badger is still currently used for some state. Run your "+ "Postgres instance on the same machine as your node for optimal performance.") + cmd.PersistentFlags().String("pos-validator-seed", "", "The private key of the Proof of Stake validator. "+ + "The private key should be passed as hex, optionally prefixed with a '0x', and map to a valid BLS12_381 private key. "+ + "The private key must be 32 bytes, or 64 characters, in length (excluding the '0x' prefix). Setting this flag automatically "+ + "makes the node run Proof of Stake Validator.") cmd.PersistentFlags().Uint32("max-sync-block-height", 0, "Max sync block height") // Hyper Sync diff --git a/integration_testing/connection_bridge.go b/integration_testing/connection_bridge.go index 1d0228467..4c3b28dde 100644 --- a/integration_testing/connection_bridge.go +++ b/integration_testing/connection_bridge.go @@ -172,7 +172,7 @@ func (bridge *ConnectionBridge) getVersionMessage(node *cmd.Node) *lib.MsgDeSoVe } if node.Server != nil { - ver.StartBlockHeight = uint32(node.Server.GetBlockchain().BlockTip().Header.Height) + ver.LatestBlockHeight = uint32(node.Server.GetBlockchain().BlockTip().Header.Height) } ver.MinFeeRateNanosPerKB = node.Config.MinFeerate return ver @@ -186,7 +186,7 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode connection.VersionNonceSent = versionMessage.Nonce // Send the version message. - fmt.Println("Sending version message:", versionMessage, versionMessage.StartBlockHeight) + fmt.Println("Sending version message:", versionMessage, versionMessage.LatestBlockHeight) if err := connection.WriteDeSoMessage(versionMessage); err != nil { return err } diff --git a/lib/bls_keystore.go b/lib/bls_keystore.go index b1356da6c..325c4661f 100644 --- a/lib/bls_keystore.go +++ b/lib/bls_keystore.go @@ -112,8 +112,7 @@ func (signer *BLSSigner) SignRandomSeedHash(randomSeedHash *RandomSeedHash) (*bl // TODO: Add signing function for PoS blocks func (signer *BLSSigner) SignPoSValidatorHandshake(nonceSent uint64, nonceReceived uint64, tstampMicro uint64) (*bls.Signature, error) { - // FIXME - payload := []byte{} + payload := GetVerackHandshakePayload(nonceSent, nonceReceived, tstampMicro) return signer.sign(BLSSignatureOpCodePoSValidatorHandshake, payload[:]) } @@ -141,7 +140,6 @@ func BLSVerifyValidatorTimeout(view uint64, highQCView uint64, signature *bls.Si func BLSVerifyPoSValidatorHandshake(nonceSent uint64, nonceReceived uint64, tstampMicro uint64, signature *bls.Signature, publicKey *bls.PublicKey) (bool, error) { - // FIXME - payload := []byte{} + payload := GetVerackHandshakePayload(nonceSent, nonceReceived, tstampMicro) return _blsVerify(BLSSignatureOpCodePoSValidatorHandshake, payload[:], signature, publicKey) } diff --git a/lib/network.go b/lib/network.go index bb7fcae4e..80d412c4f 100644 --- a/lib/network.go +++ b/lib/network.go @@ -1578,8 +1578,7 @@ type MsgDeSoVersion struct { // The height of the last block on the main chain for // this node. // - // TODO: We need to update this to uint64 - StartBlockHeight uint32 + LatestBlockHeight uint64 // MinFeeRateNanosPerKB is the minimum feerate that a peer will // accept from other peers when validating transactions. @@ -1611,11 +1610,11 @@ func (msg *MsgDeSoVersion) ToBytes(preSignature bool) ([]byte, error) { retBytes = append(retBytes, UintToBuf(uint64(len(msg.UserAgent)))...) retBytes = append(retBytes, msg.UserAgent...) - // StartBlockHeight - retBytes = append(retBytes, UintToBuf(uint64(msg.StartBlockHeight))...) + // LatestBlockHeight + retBytes = append(retBytes, UintToBuf(msg.LatestBlockHeight)...) // MinFeeRateNanosPerKB - retBytes = append(retBytes, UintToBuf(uint64(msg.MinFeeRateNanosPerKB))...) + retBytes = append(retBytes, UintToBuf(msg.MinFeeRateNanosPerKB)...) // JSONAPIPort - deprecated retBytes = append(retBytes, UintToBuf(uint64(0))...) @@ -1689,13 +1688,13 @@ func (msg *MsgDeSoVersion) FromBytes(data []byte) error { retVer.UserAgent = string(userAgent) } - // StartBlockHeight + // LatestBlockHeight { - lastBlockHeight, err := ReadUvarint(rr) - if err != nil || lastBlockHeight > math.MaxUint32 { + latestBlockHeight, err := ReadUvarint(rr) + if err != nil || latestBlockHeight > math.MaxUint32 { return errors.Wrapf(err, "MsgDeSoVersion.FromBytes: Problem converting msg.LatestBlockHeight") } - retVer.StartBlockHeight = uint32(lastBlockHeight) + retVer.LatestBlockHeight = latestBlockHeight } // MinFeeRateNanosPerKB diff --git a/lib/network_test.go b/lib/network_test.go index e9d6e6e5e..8a971f75a 100644 --- a/lib/network_test.go +++ b/lib/network_test.go @@ -43,7 +43,7 @@ var expectedVer = &MsgDeSoVersion{ TstampSecs: 2, Nonce: uint64(0xffffffffffffffff), UserAgent: "abcdef", - StartBlockHeight: 4, + LatestBlockHeight: 4, MinFeeRateNanosPerKB: 10, } diff --git a/lib/peer.go b/lib/peer.go index 2c6d76e4d..98d2c135e 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -3,7 +3,6 @@ package lib import ( "fmt" "github.com/decred/dcrd/lru" - "math" "net" "sort" "sync/atomic" @@ -143,6 +142,10 @@ type Peer struct { syncType NodeSyncType } +func (pp *Peer) GetId() uint64 { + return pp.ID +} + func (pp *Peer) AddDeSoMessage(desoMessage DeSoMessage, inbound bool) { // Don't add any more messages if the peer is disconnected if pp.disconnected != 0 { @@ -1140,20 +1143,6 @@ out: // This switch actually processes the message. For most messages, we just // pass them onto the Server. switch msg := rmsg.(type) { - case *MsgDeSoVersion: - // We always receive the VERSION from the Peer before starting this select - // statement, so getting one here is an error. - - glog.Errorf("Peer.inHandler: Already received 'version' from peer %v -- disconnecting", pp) - break out - - case *MsgDeSoVerack: - // We always receive the VERACK from the Peer before starting this select - // statement, so getting one here is an error. - - glog.Errorf("Peer.inHandler: Already received 'verack' from peer %v -- disconnecting", pp) - break out - case *MsgDeSoPing: // Respond to a ping with a pong. pp.HandlePingMsg(msg) @@ -1290,216 +1279,6 @@ func (pp *Peer) ReadDeSoMessage() (DeSoMessage, error) { return msg, nil } -func (pp *Peer) NewVersionMessage(params *DeSoParams) *MsgDeSoVersion { - ver := NewMessage(MsgTypeVersion).(*MsgDeSoVersion) - - ver.Version = params.ProtocolVersion.ToUint64() - ver.TstampSecs = time.Now().Unix() - // We use an int64 instead of a uint64 for convenience but - // this should be fine since we're just looking to generate a - // unique value. - ver.Nonce = uint64(RandInt64(math.MaxInt64)) - ver.UserAgent = params.UserAgent - // TODO: Right now all peers are full nodes. Later on we'll want to change this, - // at which point we'll need to do a little refactoring. - ver.Services = SFFullNodeDeprecated - if pp.cmgr != nil && pp.cmgr.HyperSync { - ver.Services |= SFHyperSync - } - if pp.srv.blockchain.archivalMode { - ver.Services |= SFArchivalNode - } - - // When a node asks you for what height you have, you should reply with - // the height of the latest actual block you have. This makes it so that - // peers who have up-to-date headers but missing blocks won't be considered - // for initial block download. - // - // TODO: This is ugly. It would be nice if the Peer required zero knowledge of the - // Server and the Blockchain. - if pp.srv != nil { - ver.StartBlockHeight = uint32(pp.srv.blockchain.blockTip().Header.Height) - } else { - ver.StartBlockHeight = uint32(0) - } - - // Set the minimum fee rate the peer will accept. - ver.MinFeeRateNanosPerKB = pp.minTxFeeRateNanosPerKB - - return ver -} - -func (pp *Peer) sendVerack() error { - verackMsg := NewMessage(MsgTypeVerack) - // Include the nonce we received in the peer's version message so - // we can validate that we actually control our IP address. - verackMsg.(*MsgDeSoVerack).NonceReceived = pp.VersionNonceReceived - if err := pp.WriteDeSoMessage(verackMsg); err != nil { - return errors.Wrap(err, "sendVerack: ") - } - - return nil -} - -func (pp *Peer) readVerack() error { - msg, err := pp.ReadDeSoMessage() - if err != nil { - return errors.Wrap(err, "readVerack: ") - } - if msg.GetMsgType() != MsgTypeVerack { - return fmt.Errorf( - "readVerack: Received message with type %s but expected type VERACK. ", - msg.GetMsgType().String()) - } - verackMsg := msg.(*MsgDeSoVerack) - if verackMsg.NonceReceived != pp.VersionNonceSent { - return fmt.Errorf( - "readVerack: Received VERACK message with nonce %d but expected nonce %d", - verackMsg.NonceReceived, pp.VersionNonceSent) - } - - return nil -} - -func (pp *Peer) sendVersion() error { - // For an outbound peer, we send a version message and then wait to - // hear back for one. - verMsg := pp.NewVersionMessage(pp.Params) - - // Record the nonce of this version message before we send it so we can - // detect self connections and so we can validate that the peer actually - // controls the IP she's supposedly communicating to us from. - pp.VersionNonceSent = verMsg.Nonce - if pp.cmgr != nil { - pp.cmgr.sentNonces.Add(pp.VersionNonceSent) - } - - if err := pp.WriteDeSoMessage(verMsg); err != nil { - return errors.Wrap(err, "sendVersion: ") - } - - return nil -} - -func (pp *Peer) readVersion() error { - msg, err := pp.ReadDeSoMessage() - if err != nil { - return errors.Wrap(err, "readVersion: ") - } - - verMsg, ok := msg.(*MsgDeSoVersion) - if !ok { - return fmt.Errorf( - "readVersion: Received message with type %s but expected type VERSION. "+ - "The VERSION message must preceed all others", msg.GetMsgType().String()) - } - if verMsg.Version < pp.Params.MinProtocolVersion { - return fmt.Errorf("readVersion: Peer's protocol version too low: %d (min: %v)", - verMsg.Version, pp.Params.MinProtocolVersion) - } - - // If we've sent this nonce before then return an error since this is - // a connection from ourselves. - msgNonce := verMsg.Nonce - if pp.cmgr != nil { - if pp.cmgr.sentNonces.Contains(msgNonce) { - pp.cmgr.sentNonces.Delete(msgNonce) - return fmt.Errorf("readVersion: Rejecting connection to self") - } - } - // Save the version nonce so we can include it in our verack message. - pp.VersionNonceReceived = msgNonce - - // Set the peer info-related fields. - pp.PeerInfoMtx.Lock() - pp.userAgent = verMsg.UserAgent - pp.serviceFlags = verMsg.Services - pp.advertisedProtocolVersion = verMsg.Version - negotiatedVersion := pp.Params.ProtocolVersion - if pp.advertisedProtocolVersion < pp.Params.ProtocolVersion.ToUint64() { - negotiatedVersion = NewProtocolVersionType(pp.advertisedProtocolVersion) - } - pp.negotiatedProtocolVersion = negotiatedVersion.ToUint64() - pp.PeerInfoMtx.Unlock() - - // Set the stats-related fields. - pp.StatsMtx.Lock() - pp.startingHeight = verMsg.StartBlockHeight - pp.minTxFeeRateNanosPerKB = verMsg.MinFeeRateNanosPerKB - pp.TimeConnected = time.Unix(verMsg.TstampSecs, 0) - pp.TimeOffsetSecs = verMsg.TstampSecs - time.Now().Unix() - pp.StatsMtx.Unlock() - - // Update the timeSource now that we've gotten a version message from the - // peer. - if pp.cmgr != nil { - pp.cmgr.timeSource.AddTimeSample(pp.addrStr, pp.TimeConnected) - } - - return nil -} - -func (pp *Peer) ReadWithTimeout(readFunc func() error, readTimeout time.Duration) error { - errChan := make(chan error) - go func() { - errChan <- readFunc() - }() - select { - case err := <-errChan: - { - return err - } - case <-time.After(readTimeout): - { - return fmt.Errorf("ReadWithTimeout: Timed out reading message from peer: (%v)", pp) - } - } -} - -func (pp *Peer) NegotiateVersion(versionNegotiationTimeout time.Duration) error { - if pp.isOutbound { - // Write a version message. - if err := pp.sendVersion(); err != nil { - return errors.Wrapf(err, "negotiateVersion: Problem sending version to Peer %v", pp) - } - // Read the peer's version. - if err := pp.ReadWithTimeout( - pp.readVersion, - versionNegotiationTimeout); err != nil { - - return errors.Wrapf(err, "negotiateVersion: Problem reading OUTBOUND peer version for Peer %v", pp) - } - } else { - // Read the version first since this is an inbound peer. - if err := pp.ReadWithTimeout( - pp.readVersion, - versionNegotiationTimeout); err != nil { - - return errors.Wrapf(err, "negotiateVersion: Problem reading INBOUND peer version for Peer %v", pp) - } - if err := pp.sendVersion(); err != nil { - return errors.Wrapf(err, "negotiateVersion: Problem sending version to Peer %v", pp) - } - } - - // After sending and receiving a compatible version, complete the - // negotiation by sending and receiving a verack message. - if err := pp.sendVerack(); err != nil { - return errors.Wrapf(err, "negotiateVersion: Problem sending verack to Peer %v", pp) - } - if err := pp.ReadWithTimeout( - pp.readVerack, - versionNegotiationTimeout); err != nil { - - return errors.Wrapf(err, "negotiateVersion: Problem reading VERACK message from Peer %v", pp) - } - pp.VersionNegotiated = true - - // At this point we have sent a version and validated our peer's - // version. So the negotiation should be complete. - return nil -} - // Disconnect closes a peer's network connection. func (pp *Peer) Disconnect() { // Only run the logic the first time Disconnect is called. diff --git a/lib/remote_node.go b/lib/remote_node.go new file mode 100644 index 000000000..6da6bf408 --- /dev/null +++ b/lib/remote_node.go @@ -0,0 +1,651 @@ +package lib + +import ( + "encoding/binary" + "fmt" + "github.com/btcsuite/btcd/wire" + "github.com/deso-protocol/core/bls" + "github.com/golang/glog" + "github.com/pkg/errors" + "golang.org/x/crypto/sha3" + "net" + "sync" + "time" +) + +type RemoteNodeStatus int + +const ( + RemoteNodeStatus_NotConnected RemoteNodeStatus = 0 + RemoteNodeStatus_Connected RemoteNodeStatus = 1 + RemoteNodeStatus_HandshakeCompleted RemoteNodeStatus = 2 + RemoteNodeStatus_Attempted RemoteNodeStatus = 3 + RemoteNodeStatus_Terminated RemoteNodeStatus = 4 +) + +type HandshakeStage uint8 + +const ( + HandshakeStage_NotStarted HandshakeStage = 0 + HandshakeStage_VersionSent HandshakeStage = 1 + HandshakeStage_VerackSent HandshakeStage = 2 + HandshakeStage_Completed HandshakeStage = 3 +) + +type RemoteNodeId uint64 + +func NewRemoteNodeId(id uint64) RemoteNodeId { + return RemoteNodeId(id) +} + +func (id RemoteNodeId) ToUint64() uint64 { + return uint64(id) +} + +// RemoteNode is a consensus-aware wrapper around the network Peer object. It is used to manage the lifecycle of a peer +// and to store consensus-related metadata about the peer. The RemoteNode can wrap around either an inbound or outbound +// peer connection. For outbound peers, the RemoteNode is created prior to the connection being established. In this case, +// the RemoteNode will be first used to initiate an OutboundConnectionAttempt, and then store the resulting connected peer. +// For inbound peers, the RemoteNode is created after the connection is established in ConnectionManager. +// +// Once the RemoteNode's peer is set, the RemoteNode is used to manage the handshake with the peer. The handshake involves +// rounds of Version and Verack messages being sent between our node and the peer. The handshake is complete when both +// nodes have sent and received a Version and Verack message. Once the handshake is successful, the RemoteNode will +// emit a MsgDeSoPeerHandshakeComplete control message via the Server. +// +// In steady state, i.e. after the handshake is complete, the RemoteNode can be used to send a message to the peer, +// retrieve the peer's handshake metadata, and close the connection with the peer. The RemoteNode has a single-use +// lifecycle. Once the RemoteNode is terminated, it will be disposed of, and a new RemoteNode must be created if we +// wish to reconnect to the peer in the future. +type RemoteNode struct { + mtx sync.Mutex + + peer *Peer + // The id is the unique identifier of this RemoteNode. For outbound connections, the id will be the same as the + // attemptId of the OutboundConnectionAttempt, and the subsequent id of the outbound peer. For inbound connections, + // the id will be the same as the inbound peer's id. + id RemoteNodeId + connectionStatus RemoteNodeStatus + + params *DeSoParams + srv *Server + cmgr *ConnectionManager + + // minTxFeeRateNanosPerKB is the minimum transaction fee rate in nanos per KB that our node will accept. + minTxFeeRateNanosPerKB uint64 + // latestBlockHeight is the block height of our node's block tip. + latestBlockHeight uint64 + // nodeServices is a bitfield that indicates the services supported by our node. + nodeServices ServiceFlag + + // handshakeMetadata is used to store the information received from the peer during the handshake. + handshakeMetadata *HandshakeMetadata + // keystore is a reference to the node's BLS private key storage. In the context of a RemoteNode, the keystore is + // used in the Verack message for validator nodes to prove ownership of the validator BLS public key. + keystore *BLSKeystore + + // versionTimeExpected is the latest time by which we expect to receive a Version message from the peer. + // If the Version message is not received by this time, the connection will be terminated. + versionTimeExpected *time.Time + // verackTimeExpected is the latest time by which we expect to receive a Verack message from the peer. + // If the Verack message is not received by this time, the connection will be terminated. + verackTimeExpected *time.Time +} + +// HandshakeMetadata stores the information received from the peer during the Version and Verack exchange. +type HandshakeMetadata struct { + // ### The following fields are populated during the MsgDeSoVersion exchange. + // versionNonceSent is the nonce sent in the Version message to the peer. + versionNonceSent uint64 + // versionNonceReceived is the nonce received in the Version message from the peer. + versionNonceReceived uint64 + // userAgent is a meta level label that can be used to analyze the network. + userAgent string + // serviceFlag is a bitfield that indicates the services supported by the peer. + serviceFlag ServiceFlag + // latestBlockHeight is the block height of the peer's block tip during the Version exchange. + latestBlockHeight uint64 + // minTxFeeRateNanosPerKB is the minimum transaction fee rate in nanos per KB that the peer will accept. + minTxFeeRateNanosPerKB uint64 + // advertisedProtocolVersion is the protocol version advertised by the peer. + advertisedProtocolVersion ProtocolVersionType + // negotiatedProtocolVersion is the protocol version negotiated between the peer and our node. This is the minimum + // of the advertised protocol version and our node's protocol version. + negotiatedProtocolVersion ProtocolVersionType + // timeConnected is the unix timestamp of the peer, measured when the peer sent their Version message. + timeConnected *time.Time + // versionNegotiated is true if the peer passed the version negotiation step. + versionNegotiated bool + // timeOffsetSecs is the time offset between our node and the peer, measured by taking the difference between the + // peer's unix timestamp and our node's unix timestamp. + timeOffsetSecs uint64 + + // ### The following fields are populated during the MsgDeSoVerack exchange. + // validatorPublicKey is the BLS public key of the peer, if the peer is a validator node. + validatorPublicKey *bls.PublicKey + + // ### The following fields are handshake control fields. + handshakeStage HandshakeStage +} + +func NewHandshakeMetadata() *HandshakeMetadata { + return &HandshakeMetadata{ + handshakeStage: HandshakeStage_NotStarted, + } +} + +func NewRemoteNode(id RemoteNodeId, srv *Server, cmgr *ConnectionManager, keystore *BLSKeystore, + params *DeSoParams, minTxFeeRateNanosPerKB uint64, latestBlockHeight uint64, nodeServices ServiceFlag) *RemoteNode { + return &RemoteNode{ + id: id, + connectionStatus: RemoteNodeStatus_NotConnected, + handshakeMetadata: NewHandshakeMetadata(), + srv: srv, + cmgr: cmgr, + keystore: keystore, + params: params, + minTxFeeRateNanosPerKB: minTxFeeRateNanosPerKB, + latestBlockHeight: latestBlockHeight, + nodeServices: nodeServices, + } +} + +// setStatusHandshakeCompleted sets the connection status of the remote node to HandshakeCompleted. +func (rn *RemoteNode) setStatusHandshakeCompleted() { + rn.connectionStatus = RemoteNodeStatus_HandshakeCompleted +} + +// setStatusConnected sets the connection status of the remote node to connected. +func (rn *RemoteNode) setStatusConnected() { + rn.connectionStatus = RemoteNodeStatus_Connected +} + +// setStatusTerminated sets the connection status of the remote node to terminated. +func (rn *RemoteNode) setStatusTerminated() { + rn.connectionStatus = RemoteNodeStatus_Terminated +} + +// setStatusAttempted sets the connection status of the remote node to attempted. +func (rn *RemoteNode) setStatusAttempted() { + rn.connectionStatus = RemoteNodeStatus_Attempted +} + +func (rn *RemoteNode) GetId() RemoteNodeId { + return rn.id +} + +func (rn *RemoteNode) GetPeer() *Peer { + return rn.peer +} + +func (rn *RemoteNode) GetNegotiatedProtocolVersion() ProtocolVersionType { + return rn.handshakeMetadata.negotiatedProtocolVersion +} + +func (rn *RemoteNode) GetValidatorPublicKey() *bls.PublicKey { + return rn.handshakeMetadata.validatorPublicKey +} + +func (rn *RemoteNode) GetUserAgent() string { + return rn.handshakeMetadata.userAgent +} + +func (rn *RemoteNode) getHandshakeStage() HandshakeStage { + return rn.handshakeMetadata.handshakeStage +} + +func (rn *RemoteNode) setHandshakeStage(stage HandshakeStage) { + rn.handshakeMetadata.handshakeStage = stage +} + +func (rn *RemoteNode) IsInbound() bool { + return rn.peer != nil && !rn.peer.IsOutbound() +} + +func (rn *RemoteNode) IsOutbound() bool { + return rn.peer != nil && rn.peer.IsOutbound() +} + +func (rn *RemoteNode) IsPersistent() bool { + return rn.peer != nil && rn.peer.IsPersistent() +} + +func (rn *RemoteNode) IsNotConnected() bool { + return rn.connectionStatus == RemoteNodeStatus_NotConnected +} + +func (rn *RemoteNode) IsConnected() bool { + return rn.connectionStatus == RemoteNodeStatus_Connected +} + +func (rn *RemoteNode) IsHandshakeCompleted() bool { + return rn.connectionStatus == RemoteNodeStatus_HandshakeCompleted +} + +func (rn *RemoteNode) IsValidator() bool { + if !rn.IsHandshakeCompleted() { + return false + } + return rn.GetValidatorPublicKey() != nil +} + +// DialOutboundConnection dials an outbound connection to the provided netAddr. +func (rn *RemoteNode) DialOutboundConnection(netAddr *wire.NetAddress) error { + if !rn.IsNotConnected() { + return fmt.Errorf("RemoteNode.DialOutboundConnection: RemoteNode is not in the NotConnected state") + } + + rn.mtx.Lock() + defer rn.mtx.Unlock() + + rn.cmgr.DialOutboundConnection(netAddr, rn.GetId().ToUint64()) + rn.setStatusAttempted() + return nil +} + +// DialPersistentOutboundConnection dials a persistent outbound connection to the provided netAddr. +func (rn *RemoteNode) DialPersistentOutboundConnection(netAddr *wire.NetAddress) error { + if !rn.IsNotConnected() { + return fmt.Errorf("RemoteNode.DialPersistentOutboundConnection: RemoteNode is not in the NotConnected state") + } + + rn.mtx.Lock() + defer rn.mtx.Unlock() + + rn.cmgr.DialPersistentOutboundConnection(netAddr, rn.GetId().ToUint64()) + rn.setStatusAttempted() + return nil +} + +// AttachInboundConnection creates an inbound peer once a successful inbound connection has been established. +func (rn *RemoteNode) AttachInboundConnection(conn net.Conn, na *wire.NetAddress) error { + if !rn.IsNotConnected() { + return fmt.Errorf("RemoteNode.AttachInboundConnection: RemoteNode is not in the NotConnected state") + } + + rn.mtx.Lock() + defer rn.mtx.Unlock() + + id := rn.GetId().ToUint64() + rn.peer = rn.cmgr.ConnectPeer(id, conn, na, false, false) + rn.setStatusConnected() + return nil +} + +// AttachOutboundConnection creates an outbound peer once a successful outbound connection has been established. +func (rn *RemoteNode) AttachOutboundConnection(conn net.Conn, na *wire.NetAddress, isPersistent bool) error { + if rn.connectionStatus != RemoteNodeStatus_Attempted { + return fmt.Errorf("RemoteNode.AttachOutboundConnection: RemoteNode is not in the Attempted state") + } + + rn.mtx.Lock() + defer rn.mtx.Unlock() + + id := rn.GetId().ToUint64() + rn.peer = rn.cmgr.ConnectPeer(id, conn, na, true, isPersistent) + rn.setStatusConnected() + return nil +} + +// Disconnect disconnects the remote node, closing the attempted connection or the established connection. +func (rn *RemoteNode) Disconnect() { + rn.mtx.Lock() + defer rn.mtx.Unlock() + + id := rn.GetId().ToUint64() + switch rn.connectionStatus { + case RemoteNodeStatus_Attempted: + rn.cmgr.CloseAttemptedConnection(id) + case RemoteNodeStatus_Connected, RemoteNodeStatus_HandshakeCompleted: + rn.cmgr.CloseConnection(id) + } + rn.setStatusTerminated() +} + +func (rn *RemoteNode) SendMessage(desoMsg DeSoMessage) error { + if rn.connectionStatus != RemoteNodeStatus_HandshakeCompleted { + return fmt.Errorf("SendMessage: Remote node is not connected") + } + + return rn.sendMessage(desoMsg) +} + +func (rn *RemoteNode) sendMessage(desoMsg DeSoMessage) error { + if err := rn.cmgr.SendMessage(desoMsg, rn.GetId().ToUint64()); err != nil { + return fmt.Errorf("SendMessage: Problem sending message to peer (id= %d): %v", rn.id, err) + } + return nil +} + +// InitiateHandshake is a starting point for a peer handshake. If the peer is outbound, a version message is sent +// to the peer. If the peer is inbound, the peer is expected to send a version message to us first. +func (rn *RemoteNode) InitiateHandshake(nonce uint64) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + + if rn.connectionStatus != RemoteNodeStatus_Connected { + return fmt.Errorf("InitiateHandshake: Remote node is not connected") + } + if rn.getHandshakeStage() != HandshakeStage_NotStarted { + return fmt.Errorf("InitiateHandshake: Handshake has already been initiated") + } + + if rn.GetPeer().IsOutbound() { + versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + rn.versionTimeExpected = &versionTimeExpected + if err := rn.sendVersionMessage(nonce); err != nil { + return fmt.Errorf("InitiateHandshake: Problem sending version message to peer (id= %d): %v", rn.id, err) + } + rn.setHandshakeStage(HandshakeStage_VersionSent) + } + return nil +} + +// sendVersionMessage generates and sends a version message to a RemoteNode peer. The message will contain the nonce +// that is passed in as an argument. +func (rn *RemoteNode) sendVersionMessage(nonce uint64) error { + verMsg := rn.newVersionMessage(nonce) + + // Record the nonce of this version message before we send it so we can + // detect self connections and so we can validate that the peer actually + // controls the IP she's supposedly communicating to us from. + rn.handshakeMetadata.versionNonceSent = nonce + + if err := rn.sendMessage(verMsg); err != nil { + return fmt.Errorf("sendVersionMessage: Problem sending version message to peer (id= %d): %v", rn.id, err) + } + return nil +} + +// newVersionMessage returns a new version message that can be sent to a RemoteNode peer. The message will contain the +// nonce that is passed in as an argument. +func (rn *RemoteNode) newVersionMessage(nonce uint64) *MsgDeSoVersion { + ver := NewMessage(MsgTypeVersion).(*MsgDeSoVersion) + + ver.Version = rn.params.ProtocolVersion.ToUint64() + // Set the services bitfield to indicate what services this node supports. + ver.Services = rn.nodeServices + + // We use an int64 instead of a uint64 for convenience. + ver.TstampSecs = time.Now().Unix() + + ver.Nonce = nonce + ver.UserAgent = rn.params.UserAgent + + // When a node asks you for what height you have, you should reply with the height of the latest actual block you + // have. This makes it so that peers who have up-to-date headers but missing blocks won't be considered for initial + // block download. + ver.LatestBlockHeight = rn.latestBlockHeight + + // Set the minimum fee rate the peer will accept. + ver.MinFeeRateNanosPerKB = rn.minTxFeeRateNanosPerKB + + return ver +} + +// HandleVersionMessage is called upon receiving a version message from the RemoteNode's peer. The peer may be the one +// initiating the handshake, in which case, we should respond with our own version message. To do this, we pass the +// responseNonce to this function, which we will use in our response version message. +func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce uint64) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + + if rn.connectionStatus != RemoteNodeStatus_Connected { + return fmt.Errorf("HandleVersionMessage: RemoteNode is not connected") + } + if rn.getHandshakeStage() != HandshakeStage_NotStarted && rn.getHandshakeStage() != HandshakeStage_VersionSent { + return fmt.Errorf("HandleVersionMessage: Handshake has already been initiated, stage: %v", rn.getHandshakeStage()) + } + + // Verify that the peer's version matches our minimal supported version. + if verMsg.Version < rn.params.MinProtocolVersion { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v) "+ + "protocol version too low. Peer version: %v, min version: %v", rn.id, verMsg.Version, rn.params.MinProtocolVersion) + } + + // Verify that the peer's version message is sent within the version negotiation timeout. + if rn.versionTimeExpected != nil && rn.versionTimeExpected.Before(time.Now()) { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v) "+ + "version timeout. Time expected: %v, now: %v", rn.id, rn.versionTimeExpected.UnixMicro(), time.Now().UnixMicro()) + } + + vMeta := rn.handshakeMetadata + // Record the version the peer is using. + vMeta.advertisedProtocolVersion = NewProtocolVersionType(verMsg.Version) + // Decide on the protocol version to use for this connection. + negotiatedVersion := rn.params.ProtocolVersion + if verMsg.Version < rn.params.ProtocolVersion.ToUint64() { + negotiatedVersion = NewProtocolVersionType(verMsg.Version) + } + vMeta.negotiatedProtocolVersion = negotiatedVersion + + // Record the services the peer is advertising. + vMeta.serviceFlag = verMsg.Services + + // Record the tstamp sent by the peer and calculate the time offset. + timeConnected := time.Unix(verMsg.TstampSecs, 0) + vMeta.timeConnected = &timeConnected + currentTime := time.Now().Unix() + if currentTime > verMsg.TstampSecs { + vMeta.timeOffsetSecs = uint64(currentTime - verMsg.TstampSecs) + } else { + vMeta.timeOffsetSecs = uint64(verMsg.TstampSecs - currentTime) + } + + // Save the received version nonce so we can include it in our verack message. + vMeta.versionNonceReceived = verMsg.Nonce + + // Set the peer info-related fields. + vMeta.userAgent = verMsg.UserAgent + vMeta.latestBlockHeight = verMsg.LatestBlockHeight + vMeta.minTxFeeRateNanosPerKB = verMsg.MinFeeRateNanosPerKB + + // Respond to the version message if this is an inbound peer. + if !rn.peer.IsOutbound() { + if err := rn.sendVersionMessage(responseNonce); err != nil { + return errors.Wrapf(err, "RemoteNode.HandleVersionMessage: Problem sending version message to peer (id= %d)", rn.id) + } + } + + // After sending and receiving a compatible version, send the verack message. Notice that we don't wait for the + // peer's verack message even if it is an inbound peer. Instead, we just send the verack message right away. + + // Set the latest time by which we should receive a verack message from the peer. + verackTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + rn.verackTimeExpected = &verackTimeExpected + if err := rn.sendVerack(); err != nil { + return errors.Wrapf(err, "RemoteNode.HandleVersionMessage: Problem sending verack message to peer (id= %d)", rn.id) + } + + // Update the timeSource now that we've gotten a version message from the peer. + rn.cmgr.AddTimeSample(rn.peer.Address(), timeConnected) + rn.setHandshakeStage(HandshakeStage_VerackSent) + return nil +} + +// sendVerack constructs and sends a verack message to the peer. +func (rn *RemoteNode) sendVerack() error { + verackMsg, err := rn.newVerackMessage() + if err != nil { + return err + } + + if err := rn.sendMessage(verackMsg); err != nil { + return errors.Wrapf(err, "RemoteNode.SendVerack: Problem sending verack message to peer (id= %d): %v", rn.id, err) + } + return nil +} + +// newVerackMessage constructs a verack message to be sent to the peer. +func (rn *RemoteNode) newVerackMessage() (*MsgDeSoVerack, error) { + verack := NewMessage(MsgTypeVerack).(*MsgDeSoVerack) + vMeta := rn.handshakeMetadata + + switch vMeta.negotiatedProtocolVersion { + case ProtocolVersion0, ProtocolVersion1: + // For protocol versions 0 and 1, we just send back the nonce we received from the peer in the version message. + verack.Version = VerackVersion0 + verack.NonceReceived = vMeta.versionNonceReceived + case ProtocolVersion2: + // FIXME: resolve the non-validator - validator handshake issues on protocol version 2. + // For protocol version 2, we need to send the nonce we received from the peer in their version message. + // We also need to send our own nonce, which we generate for our version message. In addition, we need to + // send a current timestamp (in microseconds). We then sign the tuple of (nonceReceived, nonceSent, tstampMicro) + // using our validator BLS key, and send the signature along with our public key. + var err error + verack.Version = VerackVersion1 + verack.NonceReceived = vMeta.versionNonceReceived + verack.NonceSent = vMeta.versionNonceSent + tstampMicro := uint64(time.Now().UnixMicro()) + verack.TstampMicro = tstampMicro + verack.PublicKey = rn.keystore.GetSigner().GetPublicKey() + verack.Signature, err = rn.keystore.GetSigner().SignPoSValidatorHandshake(verack.NonceSent, verack.NonceReceived, tstampMicro) + if err != nil { + return nil, fmt.Errorf("RemoteNode.newVerackMessage: Problem signing verack message: %v", err) + } + } + return verack, nil +} + +// HandleVerackMessage handles a verack message received from the peer. +func (rn *RemoteNode) HandleVerackMessage(vrkMsg *MsgDeSoVerack) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + + if rn.connectionStatus != RemoteNodeStatus_Connected { + return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ + "verack received while in state: %v", rn.id, rn.connectionStatus) + } + + if rn.getHandshakeStage() != HandshakeStage_VerackSent { + return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ + "verack received while in handshake stage: %v", rn.id, rn.getHandshakeStage()) + } + + if rn.verackTimeExpected != nil && rn.verackTimeExpected.Before(time.Now()) { + return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ + "verack timeout. Time expected: %v, now: %v", rn.id, rn.verackTimeExpected.UnixMicro(), time.Now().UnixMicro()) + } + + var err error + vMeta := rn.handshakeMetadata + switch vMeta.negotiatedProtocolVersion { + case ProtocolVersion0, ProtocolVersion1: + err = rn.validateVerackPoW(vrkMsg) + case ProtocolVersion2: + err = rn.validateVerackPoS(vrkMsg) + } + + if err != nil { + return errors.Wrapf(err, "RemoteNode.HandleVerackMessage: Problem validating verack message from peer (id= %d)", rn.id) + } + + // If we get here then the peer has successfully completed the handshake. + vMeta.versionNegotiated = true + rn._logVersionSuccess(rn.peer) + rn.setStatusHandshakeCompleted() + rn.setHandshakeStage(HandshakeStage_Completed) + rn.srv.NotifyHandshakePeerMessage(rn.peer) + + return nil +} + +func (rn *RemoteNode) validateVerackPoW(vrkMsg *MsgDeSoVerack) error { + vMeta := rn.handshakeMetadata + + // Verify that the verack message is formatted correctly according to the PoW standard. + if vrkMsg.Version != VerackVersion0 { + return fmt.Errorf("RemoteNode.validateVerackPoW: Requesting disconnect for id: (%v) "+ + "verack version mismatch; message: %v; expected: %v", rn.id, vrkMsg.Version, VerackVersion0) + } + + // If the verack message has a nonce that wasn't previously sent to us in the version message, return an error. + if vrkMsg.NonceReceived != vMeta.versionNonceSent { + return fmt.Errorf("RemoteNode.validateVerackPoW: Requesting disconnect for id: (%v) nonce mismatch; "+ + "message: %v; nonceSent: %v", rn.id, vrkMsg.NonceReceived, vMeta.versionNonceSent) + } + + return nil +} + +func (rn *RemoteNode) validateVerackPoS(vrkMsg *MsgDeSoVerack) error { + vMeta := rn.handshakeMetadata + + // Verify that the verack message is formatted correctly according to the PoS standard. + if vrkMsg.Version != VerackVersion1 { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack version mismatch; message: %v; expected: %v", rn.id, vrkMsg.Version, VerackVersion1) + } + + // Verify that the counterparty's verack message's NonceReceived matches the NonceSent we sent. + if vrkMsg.NonceReceived != vMeta.versionNonceSent { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) nonce mismatch; "+ + "message: %v; nonceSent: %v", rn.id, vrkMsg.NonceReceived, vMeta.versionNonceSent) + } + + // Verify that the counterparty's verack message's NonceSent matches the NonceReceived we sent. + if vrkMsg.NonceSent != vMeta.versionNonceReceived { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack nonce mismatch; message: %v; expected: %v", rn.id, vrkMsg.NonceSent, vMeta.versionNonceReceived) + } + + // Get the current time in microseconds and make sure the verack message's timestamp is within 15 minutes of it. + timeNowMicro := uint64(time.Now().UnixMicro()) + if vrkMsg.TstampMicro < timeNowMicro-rn.params.HandshakeTimeoutMicroSeconds { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack timestamp too far in the past. Time now: %v, verack timestamp: %v", rn.id, timeNowMicro, vrkMsg.TstampMicro) + } + + // Make sure the verack message's public key and signature are not nil. + if vrkMsg.PublicKey == nil || vrkMsg.Signature == nil { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack public key or signature is nil", rn.id) + } + + // Verify the verack message's signature. + ok, err := BLSVerifyPoSValidatorHandshake(vrkMsg.NonceSent, vrkMsg.NonceReceived, vrkMsg.TstampMicro, + vrkMsg.Signature, vrkMsg.PublicKey) + if err != nil { + return errors.Wrapf(err, "RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack signature verification failed with error", rn.id) + } + if !ok { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack signature verification failed", rn.id) + } + + // If we get here then the verack message is valid. Set the validator public key on the peer. + vMeta.validatorPublicKey = vrkMsg.PublicKey + return nil +} + +func (rn *RemoteNode) _logVersionSuccess(peer *Peer) { + inboundStr := "INBOUND" + if rn.IsOutbound() { + inboundStr = "OUTBOUND" + } + persistentStr := "PERSISTENT" + if !rn.IsPersistent() { + persistentStr = "NON-PERSISTENT" + } + logStr := fmt.Sprintf("SUCCESS version negotiation for (%s) (%s) peer (%v).", inboundStr, persistentStr, peer) + glog.V(1).Info(logStr) +} + +func GetVerackHandshakePayload(nonceReceived uint64, nonceSent uint64, tstampMicro uint64) [32]byte { + // The payload for the verack message is the two nonces concatenated together. + // We do this so that we can sign the nonces and verify the signature on the other side. + nonceReceivedBytes := make([]byte, 8) + binary.BigEndian.PutUint64(nonceReceivedBytes, nonceReceived) + + nonceSentBytes := make([]byte, 8) + binary.BigEndian.PutUint64(nonceSentBytes, nonceSent) + + tstampBytes := make([]byte, 8) + binary.BigEndian.PutUint64(tstampBytes, tstampMicro) + + payload := append(nonceReceivedBytes, nonceSentBytes...) + payload = append(payload, tstampBytes...) + + return sha3.Sum256(payload) +} diff --git a/lib/server.go b/lib/server.go index 4260f65fb..59213657e 100644 --- a/lib/server.go +++ b/lib/server.go @@ -4,6 +4,8 @@ import ( "bytes" "encoding/hex" "fmt" + "github.com/btcsuite/btcd/wire" + "github.com/deso-protocol/core/consensus" "net" "reflect" "runtime" @@ -17,9 +19,7 @@ import ( "github.com/btcsuite/btcd/addrmgr" chainlib "github.com/btcsuite/btcd/blockchain" - "github.com/btcsuite/btcd/wire" "github.com/davecgh/go-spew/spew" - "github.com/deso-protocol/core/consensus" "github.com/deso-protocol/go-deadlock" "github.com/dgraph-io/badger/v3" "github.com/golang/glog" @@ -232,7 +232,6 @@ func (srv *Server) GetBlockProducer() *DeSoBlockProducer { return srv.blockProducer } -// TODO: The hallmark of a messy non-law-of-demeter-following interface... func (srv *Server) GetConnectionManager() *ConnectionManager { return srv.cmgr } @@ -390,7 +389,8 @@ func NewServer( _nodeMessageChan chan NodeMessage, _forceChecksum bool, _stateChangeDir string, - _hypersyncMaxQueueSize uint32) ( + _hypersyncMaxQueueSize uint32, + _blsKeystore *BLSKeystore) ( _srv *Server, _err error, _shouldRestart bool) { var err error @@ -715,6 +715,13 @@ func (srv *Server) GetSnapshot(pp *Peer) { "with Prefix (%v) and SnapshotStartEntry (%v)", pp, prefix, lastReceivedKey) } +func (srv *Server) NotifyHandshakePeerMessage(peer *Peer) { + srv.incomingMessages <- &ServerMessage{ + Peer: peer, + Msg: &MsgDeSoPeerHandshakeComplete{}, + } +} + // GetBlocksToStore is part of the archival mode, which makes the node download all historical blocks after completing // hypersync. We will go through all blocks corresponding to the snapshot and download the blocks. func (srv *Server) GetBlocksToStore(pp *Peer) { @@ -1569,11 +1576,12 @@ func (srv *Server) _startSync() { } -func (srv *Server) _handleNewPeer(pp *Peer) { +func (srv *Server) _handlePeerHandshakeComplete(pp *Peer) { isSyncCandidate := pp.IsSyncCandidate() isSyncing := srv.blockchain.isSyncing() chainState := srv.blockchain.chainState() - glog.V(1).Infof("Server._handleNewPeer: Processing NewPeer: (%v); IsSyncCandidate(%v), syncPeerIsNil=(%v), IsSyncing=(%v), ChainState=(%v)", + glog.V(1).Infof("Server._handlePeerHandshakeComplete: Processing NewPeer: (%v); IsSyncCandidate(%v), "+ + "syncPeerIsNil=(%v), IsSyncing=(%v), ChainState=(%v)", pp, isSyncCandidate, (srv.SyncPeer == nil), isSyncing, chainState) // Request a sync if we're ready @@ -2221,6 +2229,8 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_shouldQuit bool) { switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. + case *MsgDeSoPeerHandshakeComplete: + srv._handlePeerHandshakeComplete(serverMessage.Peer) case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) case *MsgDeSoQuit: From 2eb2632b15c9598f597e9b2c65bd09f3201bc1e0 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Tue, 9 Jan 2024 16:54:13 -0800 Subject: [PATCH 05/37] PoS Remote Node Indexer and Manager (#879) * Add RemoteNodeIndexer * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test Simplify indices Simplify remote node indexer; fix compilation Simplify RemoteNodeManager More RemoteNodeManager updates Nits --- collections/concurrent_map.go | 69 +++++++ collections/concurrent_map_test.go | 61 ++++++ lib/remote_node.go | 52 +++-- lib/remote_node_indexer.go | 46 +++++ lib/remote_node_manager.go | 297 +++++++++++++++++++++++++++++ lib/server.go | 6 +- 6 files changed, 512 insertions(+), 19 deletions(-) create mode 100644 collections/concurrent_map.go create mode 100644 collections/concurrent_map_test.go create mode 100644 lib/remote_node_indexer.go create mode 100644 lib/remote_node_manager.go diff --git a/collections/concurrent_map.go b/collections/concurrent_map.go new file mode 100644 index 000000000..7e3b1b09f --- /dev/null +++ b/collections/concurrent_map.go @@ -0,0 +1,69 @@ +package collections + +import "sync" + +type ConcurrentMap[Key comparable, Value any] struct { + mtx sync.RWMutex + m map[Key]Value +} + +func NewConcurrentMap[Key comparable, Value any]() *ConcurrentMap[Key, Value] { + return &ConcurrentMap[Key, Value]{ + m: make(map[Key]Value), + } +} + +func (cm *ConcurrentMap[Key, Value]) Set(key Key, val Value) { + cm.mtx.Lock() + defer cm.mtx.Unlock() + + cm.m[key] = val +} + +func (cm *ConcurrentMap[Key, Value]) Remove(key Key) { + cm.mtx.Lock() + defer cm.mtx.Unlock() + + _, ok := cm.m[key] + if !ok { + return + } + delete(cm.m, key) +} + +func (cm *ConcurrentMap[Key, Value]) Get(key Key) (Value, bool) { + cm.mtx.RLock() + defer cm.mtx.RUnlock() + + val, ok := cm.m[key] + return val, ok +} + +func (cm *ConcurrentMap[Key, Value]) Copy() map[Key]Value { + cm.mtx.RLock() + defer cm.mtx.RUnlock() + + index := make(map[Key]Value) + for key, node := range cm.m { + index[key] = node + } + return index +} + +func (cm *ConcurrentMap[Key, Value]) GetAll() []Value { + cm.mtx.RLock() + defer cm.mtx.RUnlock() + + var vals []Value + for _, val := range cm.m { + vals = append(vals, val) + } + return vals +} + +func (cm *ConcurrentMap[Key, Value]) Count() int { + cm.mtx.RLock() + defer cm.mtx.RUnlock() + + return len(cm.m) +} diff --git a/collections/concurrent_map_test.go b/collections/concurrent_map_test.go new file mode 100644 index 000000000..46fc0c9ba --- /dev/null +++ b/collections/concurrent_map_test.go @@ -0,0 +1,61 @@ +package collections + +import ( + "fmt" + "testing" +) + +func TestConcurrentMap(t *testing.T) { + m := NewConcurrentMap[string, int]() + control := make(map[string]int) + + // test add + for ii := 0; ii < 100; ii++ { + key := fmt.Sprintf("%v", ii) + m.Set(key, ii) + control[key] = ii + } + + for key, val := range control { + if mVal, ok := m.Get(key); !ok || mVal != val { + t.Errorf("Expected %d, got %d", val, m.m[key]) + } + } + + // test remove + for ii := 0; ii < 50; ii++ { + key := fmt.Sprintf("%v", ii) + m.Remove(key) + delete(control, key) + } + + for key, val := range control { + if mVal, ok := m.Get(key); !ok || mVal != val { + t.Errorf("Expected %d, got %d", val, m.m[key]) + } + } + + // test copy + copy := m.Copy() + for key, val := range control { + if mVal, ok := copy[key]; !ok || mVal != val { + t.Errorf("Expected %d, got %d", val, m.m[key]) + } + } + if len(copy) != len(control) { + t.Errorf("Expected %d, got %d", len(control), len(copy)) + } + + // test get all + vals := m.GetAll() + for _, val := range vals { + if _, ok := control[fmt.Sprintf("%v", val)]; !ok { + t.Errorf("Expected %d, got %d", val, m.m[fmt.Sprintf("%v", val)]) + } + } + + // test size + if m.Count() != len(control) { + t.Errorf("Expected %d, got %d", len(control), m.Count()) + } +} diff --git a/lib/remote_node.go b/lib/remote_node.go index 6da6bf408..64ed0be8f 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -58,13 +58,19 @@ func (id RemoteNodeId) ToUint64() uint64 { // lifecycle. Once the RemoteNode is terminated, it will be disposed of, and a new RemoteNode must be created if we // wish to reconnect to the peer in the future. type RemoteNode struct { - mtx sync.Mutex + mtx sync.RWMutex peer *Peer // The id is the unique identifier of this RemoteNode. For outbound connections, the id will be the same as the // attemptId of the OutboundConnectionAttempt, and the subsequent id of the outbound peer. For inbound connections, // the id will be the same as the inbound peer's id. - id RemoteNodeId + id RemoteNodeId + // validatorPublicKey is the BLS public key of the validator node. This is only set for validator nodes. For + // non-validator nodes, this will be nil. For outbound validators nodes, the validatorPublicKey will be set when + // the RemoteNode is instantiated. And for inbound validator nodes, the validatorPublicKey will be set when the + // handshake is completed. + validatorPublicKey *bls.PublicKey + connectionStatus RemoteNodeStatus params *DeSoParams @@ -134,10 +140,11 @@ func NewHandshakeMetadata() *HandshakeMetadata { } } -func NewRemoteNode(id RemoteNodeId, srv *Server, cmgr *ConnectionManager, keystore *BLSKeystore, +func NewRemoteNode(id RemoteNodeId, validatorPublicKey *bls.PublicKey, srv *Server, cmgr *ConnectionManager, keystore *BLSKeystore, params *DeSoParams, minTxFeeRateNanosPerKB uint64, latestBlockHeight uint64, nodeServices ServiceFlag) *RemoteNode { return &RemoteNode{ id: id, + validatorPublicKey: validatorPublicKey, connectionStatus: RemoteNodeStatus_NotConnected, handshakeMetadata: NewHandshakeMetadata(), srv: srv, @@ -183,7 +190,7 @@ func (rn *RemoteNode) GetNegotiatedProtocolVersion() ProtocolVersionType { } func (rn *RemoteNode) GetValidatorPublicKey() *bls.PublicKey { - return rn.handshakeMetadata.validatorPublicKey + return rn.validatorPublicKey } func (rn *RemoteNode) GetUserAgent() string { @@ -231,13 +238,13 @@ func (rn *RemoteNode) IsValidator() bool { // DialOutboundConnection dials an outbound connection to the provided netAddr. func (rn *RemoteNode) DialOutboundConnection(netAddr *wire.NetAddress) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + if !rn.IsNotConnected() { return fmt.Errorf("RemoteNode.DialOutboundConnection: RemoteNode is not in the NotConnected state") } - rn.mtx.Lock() - defer rn.mtx.Unlock() - rn.cmgr.DialOutboundConnection(netAddr, rn.GetId().ToUint64()) rn.setStatusAttempted() return nil @@ -245,13 +252,13 @@ func (rn *RemoteNode) DialOutboundConnection(netAddr *wire.NetAddress) error { // DialPersistentOutboundConnection dials a persistent outbound connection to the provided netAddr. func (rn *RemoteNode) DialPersistentOutboundConnection(netAddr *wire.NetAddress) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + if !rn.IsNotConnected() { return fmt.Errorf("RemoteNode.DialPersistentOutboundConnection: RemoteNode is not in the NotConnected state") } - rn.mtx.Lock() - defer rn.mtx.Unlock() - rn.cmgr.DialPersistentOutboundConnection(netAddr, rn.GetId().ToUint64()) rn.setStatusAttempted() return nil @@ -259,13 +266,13 @@ func (rn *RemoteNode) DialPersistentOutboundConnection(netAddr *wire.NetAddress) // AttachInboundConnection creates an inbound peer once a successful inbound connection has been established. func (rn *RemoteNode) AttachInboundConnection(conn net.Conn, na *wire.NetAddress) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + if !rn.IsNotConnected() { return fmt.Errorf("RemoteNode.AttachInboundConnection: RemoteNode is not in the NotConnected state") } - rn.mtx.Lock() - defer rn.mtx.Unlock() - id := rn.GetId().ToUint64() rn.peer = rn.cmgr.ConnectPeer(id, conn, na, false, false) rn.setStatusConnected() @@ -274,13 +281,13 @@ func (rn *RemoteNode) AttachInboundConnection(conn net.Conn, na *wire.NetAddress // AttachOutboundConnection creates an outbound peer once a successful outbound connection has been established. func (rn *RemoteNode) AttachOutboundConnection(conn net.Conn, na *wire.NetAddress, isPersistent bool) error { + rn.mtx.Lock() + defer rn.mtx.Unlock() + if rn.connectionStatus != RemoteNodeStatus_Attempted { return fmt.Errorf("RemoteNode.AttachOutboundConnection: RemoteNode is not in the Attempted state") } - rn.mtx.Lock() - defer rn.mtx.Unlock() - id := rn.GetId().ToUint64() rn.peer = rn.cmgr.ConnectPeer(id, conn, na, true, isPersistent) rn.setStatusConnected() @@ -292,6 +299,10 @@ func (rn *RemoteNode) Disconnect() { rn.mtx.Lock() defer rn.mtx.Unlock() + if rn.connectionStatus == RemoteNodeStatus_Terminated { + return + } + id := rn.GetId().ToUint64() switch rn.connectionStatus { case RemoteNodeStatus_Attempted: @@ -303,6 +314,9 @@ func (rn *RemoteNode) Disconnect() { } func (rn *RemoteNode) SendMessage(desoMsg DeSoMessage) error { + rn.mtx.RLock() + rn.mtx.RUnlock() + if rn.connectionStatus != RemoteNodeStatus_HandshakeCompleted { return fmt.Errorf("SendMessage: Remote node is not connected") } @@ -614,8 +628,14 @@ func (rn *RemoteNode) validateVerackPoS(vrkMsg *MsgDeSoVerack) error { "verack signature verification failed", rn.id) } + if rn.validatorPublicKey != nil || rn.validatorPublicKey.Serialize() != vrkMsg.PublicKey.Serialize() { + return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ + "verack public key mismatch; message: %v; expected: %v", rn.id, vrkMsg.PublicKey, rn.validatorPublicKey) + } + // If we get here then the verack message is valid. Set the validator public key on the peer. vMeta.validatorPublicKey = vrkMsg.PublicKey + rn.validatorPublicKey = vrkMsg.PublicKey return nil } diff --git a/lib/remote_node_indexer.go b/lib/remote_node_indexer.go new file mode 100644 index 000000000..834dbda51 --- /dev/null +++ b/lib/remote_node_indexer.go @@ -0,0 +1,46 @@ +package lib + +import ( + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/collections" +) + +// RemoteNodeIndexer is a structure that holds information about all remote nodes and their indices. +type RemoteNodeIndexer struct { + // AllRemoteNodes is a map storing all remote nodes by their IDs. + AllRemoteNodes *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] + + // Indices for various types of remote nodes. + ValidatorIndex *collections.ConcurrentMap[bls.SerializedPublicKey, *RemoteNode] + NonValidatorOutboundIndex *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] + NonValidatorInboundIndex *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] +} + +// NewRemoteNodeIndexer initializes and returns a new instance of RemoteNodeIndexer. +func NewRemoteNodeIndexer() *RemoteNodeIndexer { + rni := &RemoteNodeIndexer{ + AllRemoteNodes: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + ValidatorIndex: collections.NewConcurrentMap[bls.SerializedPublicKey, *RemoteNode](), + NonValidatorOutboundIndex: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + NonValidatorInboundIndex: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + } + + return rni +} + +// Getter methods for accessing the different indices. +func (rni *RemoteNodeIndexer) GetAllRemoteNodes() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return rni.AllRemoteNodes +} + +func (rni *RemoteNodeIndexer) GetValidatorIndex() *collections.ConcurrentMap[bls.SerializedPublicKey, *RemoteNode] { + return rni.ValidatorIndex +} + +func (rni *RemoteNodeIndexer) GetNonValidatorOutboundIndex() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return rni.NonValidatorOutboundIndex +} + +func (rni *RemoteNodeIndexer) GetNonValidatorInboundIndex() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return rni.NonValidatorInboundIndex +} diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go new file mode 100644 index 000000000..a41fe4606 --- /dev/null +++ b/lib/remote_node_manager.go @@ -0,0 +1,297 @@ +package lib + +import ( + "fmt" + "github.com/btcsuite/btcd/wire" + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/collections" + "github.com/pkg/errors" + "net" + "sync/atomic" +) + +// RemoteNodeManager manages all the RemoteNode that the node is connected to. It is responsible for starting, maintaining, +// and stopping remote node connections. It is also responsible for organizing the remote nodes into indices for easy +// access, through the RemoteNodeIndexer. +type RemoteNodeManager struct { + // remoteNodeIndexer is a structure that stores and indexes all created remote nodes. + remoteNodeIndexer *RemoteNodeIndexer + + params *DeSoParams + srv *Server + bc *Blockchain + cmgr *ConnectionManager + + // keystore is a reference to the node's BLS private key storage. + keystore *BLSKeystore + + // configs + minTxFeeRateNanosPerKB uint64 + nodeServices ServiceFlag + + // Used to set remote node ids. Must be incremented atomically. + remoteNodeIndex uint64 +} + +func NewRemoteNodeManager(srv *Server, bc *Blockchain, cmgr *ConnectionManager, keystore *BLSKeystore, params *DeSoParams, + minTxFeeRateNanosPerKB uint64, nodeServices ServiceFlag) *RemoteNodeManager { + return &RemoteNodeManager{ + remoteNodeIndexer: NewRemoteNodeIndexer(), + params: params, + srv: srv, + bc: bc, + cmgr: cmgr, + keystore: keystore, + minTxFeeRateNanosPerKB: minTxFeeRateNanosPerKB, + nodeServices: nodeServices, + } +} + +func (manager *RemoteNodeManager) newRemoteNode(validatorPublicKey *bls.PublicKey) *RemoteNode { + id := atomic.AddUint64(&manager.remoteNodeIndex, 1) + remoteNodeId := NewRemoteNodeId(id) + latestBlockHeight := uint64(manager.bc.BlockTip().Height) + return NewRemoteNode(remoteNodeId, validatorPublicKey, manager.srv, manager.cmgr, manager.keystore, manager.params, + manager.minTxFeeRateNanosPerKB, latestBlockHeight, manager.nodeServices) +} + +func (manager *RemoteNodeManager) ProcessCompletedHandshake(remoteNode *RemoteNode) { + if remoteNode == nil { + return + } + + if remoteNode.IsValidator() { + manager.SetValidator(remoteNode) + } else { + manager.SetNonValidator(remoteNode) + } + manager.srv.HandleAcceptedPeer(remoteNode.GetPeer()) +} + +func (manager *RemoteNodeManager) Disconnect(rn *RemoteNode) { + rn.Disconnect() + manager.removeRemoteNodeFromIndexer(rn) +} + +func (manager *RemoteNodeManager) DisconnectById(id RemoteNodeId) { + rn := manager.GetRemoteNodeById(id) + if rn == nil { + return + } + + manager.Disconnect(rn) +} + +func (manager *RemoteNodeManager) removeRemoteNodeFromIndexer(rn *RemoteNode) { + if rn == nil { + return + } + + indexer := manager.remoteNodeIndexer + indexer.GetAllRemoteNodes().Remove(rn.GetId()) + if rn.validatorPublicKey != nil { + indexer.GetValidatorIndex().Remove(rn.validatorPublicKey.Serialize()) + } + indexer.GetNonValidatorOutboundIndex().Remove(rn.GetId()) + indexer.GetNonValidatorInboundIndex().Remove(rn.GetId()) +} + +func (manager *RemoteNodeManager) SendMessage(rn *RemoteNode, desoMessage DeSoMessage) error { + if rn == nil { + return fmt.Errorf("RemoteNodeManager.SendMessage: RemoteNode is nil") + } + + return rn.SendMessage(desoMessage) +} + +// ########################### +// ## Create RemoteNode +// ########################### + +func (manager *RemoteNodeManager) CreateValidatorConnection(netAddr *wire.NetAddress, publicKey *bls.PublicKey) error { + if netAddr == nil || publicKey == nil { + return fmt.Errorf("RemoteNodeManager.CreateValidatorConnection: netAddr or public key is nil") + } + + remoteNode := manager.newRemoteNode(publicKey) + if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { + return errors.Wrapf(err, "RemoteNodeManager.CreateValidatorConnection: Problem calling DialPersistentOutboundConnection "+ + "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) + } + manager.setRemoteNode(remoteNode) + manager.GetValidatorIndex().Set(publicKey.Serialize(), remoteNode) + return nil +} + +func (manager *RemoteNodeManager) CreateNonValidatorPersistentOutboundConnection(netAddr *wire.NetAddress) error { + if netAddr == nil { + return fmt.Errorf("RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: netAddr is nil") + } + + remoteNode := manager.newRemoteNode(nil) + if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { + return errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: Problem calling DialPersistentOutboundConnection "+ + "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) + } + manager.setRemoteNode(remoteNode) + manager.GetNonValidatorOutboundIndex().Set(remoteNode.GetId(), remoteNode) + return nil +} + +func (manager *RemoteNodeManager) CreateNonValidatorOutboundConnection(netAddr *wire.NetAddress) error { + if netAddr == nil { + return fmt.Errorf("RemoteNodeManager.CreateNonValidatorOutboundConnection: netAddr is nil") + } + + remoteNode := manager.newRemoteNode(nil) + if err := remoteNode.DialOutboundConnection(netAddr); err != nil { + return errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorOutboundConnection: Problem calling DialOutboundConnection "+ + "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) + } + manager.setRemoteNode(remoteNode) + manager.GetNonValidatorOutboundIndex().Set(remoteNode.GetId(), remoteNode) + return nil +} + +func (manager *RemoteNodeManager) AttachInboundConnection(conn net.Conn, + na *wire.NetAddress) (*RemoteNode, error) { + + remoteNode := manager.newRemoteNode(nil) + if err := remoteNode.AttachInboundConnection(conn, na); err != nil { + return nil, errors.Wrapf(err, "RemoteNodeManager.AttachInboundConnection: Problem calling AttachInboundConnection "+ + "for addr: (%s)", conn.RemoteAddr().String()) + } + + manager.setRemoteNode(remoteNode) + return remoteNode, nil +} + +func (manager *RemoteNodeManager) AttachOutboundConnection(conn net.Conn, na *wire.NetAddress, + remoteNodeId uint64, isPersistent bool) (*RemoteNode, error) { + + id := NewRemoteNodeId(remoteNodeId) + remoteNode := manager.GetRemoteNodeById(id) + if remoteNode == nil { + return nil, fmt.Errorf("RemoteNodeManager.AttachOutboundConnection: Problem getting remote node by id (%d)", + id.ToUint64()) + } + + if err := remoteNode.AttachOutboundConnection(conn, na, isPersistent); err != nil { + manager.Disconnect(remoteNode) + return nil, errors.Wrapf(err, "RemoteNodeManager.AttachOutboundConnection: Problem calling AttachOutboundConnection "+ + "for addr: (%s)", conn.RemoteAddr().String()) + } + + return remoteNode, nil +} + +// ########################### +// ## Setters +// ########################### + +func (manager *RemoteNodeManager) setRemoteNode(rn *RemoteNode) { + if rn == nil { + return + } + + manager.GetAllRemoteNodes().Set(rn.GetId(), rn) +} + +func (manager *RemoteNodeManager) SetNonValidator(rn *RemoteNode) { + if rn == nil { + return + } + + if rn.IsOutbound() { + manager.GetNonValidatorOutboundIndex().Set(rn.GetId(), rn) + } else if rn.IsInbound() { + manager.GetNonValidatorInboundIndex().Set(rn.GetId(), rn) + } else { + manager.Disconnect(rn) + return + } + + manager.UnsetValidator(rn) +} + +func (manager *RemoteNodeManager) SetValidator(remoteNode *RemoteNode) { + if remoteNode == nil { + return + } + + pk := remoteNode.GetValidatorPublicKey() + if pk == nil { + manager.Disconnect(remoteNode) + return + } + manager.GetValidatorIndex().Set(pk.Serialize(), remoteNode) +} + +func (manager *RemoteNodeManager) UnsetValidator(remoteNode *RemoteNode) { + if remoteNode == nil { + return + } + + pk := remoteNode.GetValidatorPublicKey() + if pk == nil { + return + } + manager.GetValidatorIndex().Remove(pk.Serialize()) +} + +func (manager *RemoteNodeManager) UnsetNonValidator(rn *RemoteNode) { + if rn == nil { + return + } + + if rn.IsOutbound() { + manager.GetNonValidatorOutboundIndex().Remove(rn.GetId()) + } else if rn.IsInbound() { + manager.GetNonValidatorInboundIndex().Remove(rn.GetId()) + } else { + manager.Disconnect(rn) + } +} + +// ########################### +// ## Getters +// ########################### + +func (manager *RemoteNodeManager) GetAllRemoteNodes() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return manager.remoteNodeIndexer.GetAllRemoteNodes() +} + +func (manager *RemoteNodeManager) GetValidatorIndex() *collections.ConcurrentMap[bls.SerializedPublicKey, *RemoteNode] { + return manager.remoteNodeIndexer.GetValidatorIndex() +} + +func (manager *RemoteNodeManager) GetNonValidatorOutboundIndex() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return manager.remoteNodeIndexer.GetNonValidatorOutboundIndex() +} + +func (manager *RemoteNodeManager) GetNonValidatorInboundIndex() *collections.ConcurrentMap[RemoteNodeId, *RemoteNode] { + return manager.remoteNodeIndexer.GetNonValidatorInboundIndex() +} + +func (manager *RemoteNodeManager) GetRemoteNodeFromPeer(peer *Peer) *RemoteNode { + if peer == nil { + return nil + } + id := NewRemoteNodeId(peer.GetId()) + rn, _ := manager.GetAllRemoteNodes().Get(id) + return rn +} + +func (manager *RemoteNodeManager) GetRemoteNodeById(id RemoteNodeId) *RemoteNode { + rn, ok := manager.GetAllRemoteNodes().Get(id) + if !ok { + return nil + } + return rn +} + +func (manager *RemoteNodeManager) GetAllNonValidators() []*RemoteNode { + outboundRemoteNodes := manager.GetNonValidatorOutboundIndex().GetAll() + inboundRemoteNodes := manager.GetNonValidatorInboundIndex().GetAll() + return append(outboundRemoteNodes, inboundRemoteNodes...) +} diff --git a/lib/server.go b/lib/server.go index 59213657e..d1c82e5b3 100644 --- a/lib/server.go +++ b/lib/server.go @@ -1576,11 +1576,11 @@ func (srv *Server) _startSync() { } -func (srv *Server) _handlePeerHandshakeComplete(pp *Peer) { +func (srv *Server) HandleAcceptedPeer(pp *Peer) { isSyncCandidate := pp.IsSyncCandidate() isSyncing := srv.blockchain.isSyncing() chainState := srv.blockchain.chainState() - glog.V(1).Infof("Server._handlePeerHandshakeComplete: Processing NewPeer: (%v); IsSyncCandidate(%v), "+ + glog.V(1).Infof("Server.HandleAcceptedPeer: Processing NewPeer: (%v); IsSyncCandidate(%v), "+ "syncPeerIsNil=(%v), IsSyncing=(%v), ChainState=(%v)", pp, isSyncCandidate, (srv.SyncPeer == nil), isSyncing, chainState) @@ -2230,7 +2230,7 @@ func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_should switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. case *MsgDeSoPeerHandshakeComplete: - srv._handlePeerHandshakeComplete(serverMessage.Peer) + break case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) case *MsgDeSoQuit: From a86d1c8db2b1db440b5941aff5e591ee2003985c Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Wed, 10 Jan 2024 19:03:22 -0800 Subject: [PATCH 06/37] PoS HandshakeController (#860) * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test Simplify indices Simplify remote node indexer; fix compilation * Add HandshakeController PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test Simplify indices Simplify remote node indexer; fix compilation Simplify RemoteNodeManager * Merge HandshakeStage with RemoteNodeStatus; small HandshakeController nits * Nit * HandshakeController updates * Nits * Quick nit * Nits * Comment nit --- lib/constants.go | 4 + lib/pos_handshake_controller.go | 163 ++++++++++++++++++++++++++++++++ lib/remote_node.go | 85 ++++++++--------- 3 files changed, 204 insertions(+), 48 deletions(-) create mode 100644 lib/pos_handshake_controller.go diff --git a/lib/constants.go b/lib/constants.go index eccf1c582..460b8dadb 100644 --- a/lib/constants.go +++ b/lib/constants.go @@ -494,6 +494,10 @@ func (pvt ProtocolVersionType) ToUint64() uint64 { return uint64(pvt) } +func (pvt ProtocolVersionType) Before(version ProtocolVersionType) bool { + return pvt.ToUint64() < version.ToUint64() +} + // DeSoParams defines the full list of possible parameters for the // DeSo network. type DeSoParams struct { diff --git a/lib/pos_handshake_controller.go b/lib/pos_handshake_controller.go new file mode 100644 index 000000000..6f4804f2e --- /dev/null +++ b/lib/pos_handshake_controller.go @@ -0,0 +1,163 @@ +package lib + +import ( + "fmt" + "github.com/decred/dcrd/lru" + "github.com/golang/glog" + "math" +) + +// HandshakeController is a structure that handles the handshake process with remote nodes. It is the entry point for +// initiating a handshake with a remote node. It is also responsible for handling version/verack messages from remote +// nodes. And for handling the handshake complete control message. +type HandshakeController struct { + rnManager *RemoteNodeManager + usedNonces lru.Cache +} + +func NewHandshakeController(rnManager *RemoteNodeManager) *HandshakeController { + + vm := &HandshakeController{ + rnManager: rnManager, + usedNonces: lru.NewCache(1000), + } + + return vm +} + +// InitiateHandshake kicks off handshake with a remote node. +func (hc *HandshakeController) InitiateHandshake(rn *RemoteNode) { + nonce := uint64(RandInt64(math.MaxInt64)) + if err := rn.InitiateHandshake(nonce); err != nil { + glog.Errorf("RemoteNode.InitiateHandshake: Error initiating handshake: %v", err) + hc.rnManager.Disconnect(rn) + } + hc.usedNonces.Add(nonce) +} + +// _handleHandshakeCompleteMessage handles HandshakeComplete control messages, sent by RemoteNodes. +func (hc *HandshakeController) _handleHandshakeCompleteMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypePeerHandshakeComplete { + return + } + + // Get the handshake information of this peer. + remoteNode := hc.rnManager.GetRemoteNodeFromPeer(origin) + if remoteNode == nil { + return + } + + if remoteNode.GetNegotiatedProtocolVersion().Before(ProtocolVersion2) { + hc.rnManager.ProcessCompletedHandshake(remoteNode) + return + } + + if err := hc.handleHandshakeCompletePoSMessage(remoteNode); err != nil { + glog.Errorf("HandshakeController._handleHandshakeCompleteMessage: Error handling PoS handshake peer message: %v", err) + hc.rnManager.Disconnect(remoteNode) + return + } + hc.rnManager.ProcessCompletedHandshake(remoteNode) +} + +func (hc *HandshakeController) handleHandshakeCompletePoSMessage(remoteNode *RemoteNode) error { + + validatorPk := remoteNode.GetValidatorPublicKey() + // If the remote node is not a potential validator, we don't need to do anything. + if validatorPk == nil { + return nil + } + + // Lookup the validator in the ValidatorIndex with the same public key. + existingValidator, ok := hc.rnManager.GetValidatorIndex().Get(validatorPk.Serialize()) + // For inbound RemoteNodes, we should ensure that there isn't an existing validator connected with the same public key. + // Inbound nodes are not initiated by us, so we shouldn't have added the RemoteNode to the ValidatorIndex yet. + if remoteNode.IsInbound() && ok { + return fmt.Errorf("HandshakeController.handleHandshakeCompletePoSMessage: Inbound RemoteNode with duplicate validator public key") + } + // For outbound RemoteNodes, we have two possible scenarios. Either the RemoteNode has been initiated as a validator, + // in which case it should already be in the ValidatorIndex. Or the RemoteNode has been initiated as a regular node, + // in which case it should not be in the ValidatorIndex, but in the NonValidatorOutboundIndex. So to ensure there is + // no duplicate connection with the same public key, we only check whether there is a validator in the ValidatorIndex + // with the RemoteNode's public key. If there is one, we want to ensure that these two RemoteNodes have identical ids. + if remoteNode.IsOutbound() && ok { + if remoteNode.GetId() != existingValidator.GetId() { + return fmt.Errorf("HandshakeController.handleHandshakeCompletePoSMessage: Outbound RemoteNode with duplicate validator public key. "+ + "Existing validator id: %v, new validator id: %v", existingValidator.GetId().ToUint64(), remoteNode.GetId().ToUint64()) + } + } + return nil +} + +// _handleVersionMessage handles version messages, sent by RemoteNodes. +func (hc *HandshakeController) _handleVersionMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeVersion { + return + } + + rn := hc.rnManager.GetRemoteNodeFromPeer(origin) + if rn == nil { + // This should never happen. + return + } + + var verMsg *MsgDeSoVersion + var ok bool + if verMsg, ok = desoMsg.(*MsgDeSoVersion); !ok { + glog.Errorf("HandshakeController._handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ + "error casting version message", origin.ID) + hc.rnManager.Disconnect(rn) + return + } + + // If we've seen this nonce before then return an error since this is a connection from ourselves. + msgNonce := verMsg.Nonce + if hc.usedNonces.Contains(msgNonce) { + hc.usedNonces.Delete(msgNonce) + glog.Errorf("HandshakeController._handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ + "nonce collision", origin.ID) + hc.rnManager.Disconnect(rn) + return + } + + // Call HandleVersionMessage on the RemoteNode. + responseNonce := uint64(RandInt64(math.MaxInt64)) + if err := rn.HandleVersionMessage(verMsg, responseNonce); err != nil { + glog.Errorf("HandshakeController._handleVersionMessage: Requesting PeerDisconnect for id: (%v) "+ + "error handling version message: %v", origin.ID, err) + hc.rnManager.Disconnect(rn) + return + + } + hc.usedNonces.Add(responseNonce) +} + +// _handleVerackMessage handles verack messages, sent by RemoteNodes. +func (hc *HandshakeController) _handleVerackMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeVerack { + return + } + + rn := hc.rnManager.GetRemoteNodeFromPeer(origin) + if rn == nil { + // This should never happen. + return + } + + var vrkMsg *MsgDeSoVerack + var ok bool + if vrkMsg, ok = desoMsg.(*MsgDeSoVerack); !ok { + glog.Errorf("HandshakeController._handleVerackMessage: Disconnecting RemoteNode with id: (%v) "+ + "error casting verack message", origin.ID) + hc.rnManager.Disconnect(rn) + return + } + + // Call HandleVerackMessage on the RemoteNode. + if err := rn.HandleVerackMessage(vrkMsg); err != nil { + glog.Errorf("HandshakeController._handleVerackMessage: Requesting PeerDisconnect for id: (%v) "+ + "error handling verack message: %v", origin.ID, err) + hc.rnManager.Disconnect(rn) + } + return +} diff --git a/lib/remote_node.go b/lib/remote_node.go index 64ed0be8f..a357118a2 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -18,18 +18,11 @@ type RemoteNodeStatus int const ( RemoteNodeStatus_NotConnected RemoteNodeStatus = 0 RemoteNodeStatus_Connected RemoteNodeStatus = 1 - RemoteNodeStatus_HandshakeCompleted RemoteNodeStatus = 2 - RemoteNodeStatus_Attempted RemoteNodeStatus = 3 - RemoteNodeStatus_Terminated RemoteNodeStatus = 4 -) - -type HandshakeStage uint8 - -const ( - HandshakeStage_NotStarted HandshakeStage = 0 - HandshakeStage_VersionSent HandshakeStage = 1 - HandshakeStage_VerackSent HandshakeStage = 2 - HandshakeStage_Completed HandshakeStage = 3 + RemoteNodeStatus_VersionSent RemoteNodeStatus = 2 + RemoteNodeStatus_VerackSent RemoteNodeStatus = 3 + RemoteNodeStatus_HandshakeCompleted RemoteNodeStatus = 4 + RemoteNodeStatus_Attempted RemoteNodeStatus = 5 + RemoteNodeStatus_Terminated RemoteNodeStatus = 6 ) type RemoteNodeId uint64 @@ -129,15 +122,10 @@ type HandshakeMetadata struct { // ### The following fields are populated during the MsgDeSoVerack exchange. // validatorPublicKey is the BLS public key of the peer, if the peer is a validator node. validatorPublicKey *bls.PublicKey - - // ### The following fields are handshake control fields. - handshakeStage HandshakeStage } func NewHandshakeMetadata() *HandshakeMetadata { - return &HandshakeMetadata{ - handshakeStage: HandshakeStage_NotStarted, - } + return &HandshakeMetadata{} } func NewRemoteNode(id RemoteNodeId, validatorPublicKey *bls.PublicKey, srv *Server, cmgr *ConnectionManager, keystore *BLSKeystore, @@ -167,6 +155,16 @@ func (rn *RemoteNode) setStatusConnected() { rn.connectionStatus = RemoteNodeStatus_Connected } +// setStatusVersionSent sets the connection status of the remote node to version sent. +func (rn *RemoteNode) setStatusVersionSent() { + rn.connectionStatus = RemoteNodeStatus_VersionSent +} + +// setStatusVerackSent sets the connection status of the remote node to verack sent. +func (rn *RemoteNode) setStatusVerackSent() { + rn.connectionStatus = RemoteNodeStatus_VerackSent +} + // setStatusTerminated sets the connection status of the remote node to terminated. func (rn *RemoteNode) setStatusTerminated() { rn.connectionStatus = RemoteNodeStatus_Terminated @@ -197,14 +195,6 @@ func (rn *RemoteNode) GetUserAgent() string { return rn.handshakeMetadata.userAgent } -func (rn *RemoteNode) getHandshakeStage() HandshakeStage { - return rn.handshakeMetadata.handshakeStage -} - -func (rn *RemoteNode) setHandshakeStage(stage HandshakeStage) { - rn.handshakeMetadata.handshakeStage = stage -} - func (rn *RemoteNode) IsInbound() bool { return rn.peer != nil && !rn.peer.IsOutbound() } @@ -307,7 +297,8 @@ func (rn *RemoteNode) Disconnect() { switch rn.connectionStatus { case RemoteNodeStatus_Attempted: rn.cmgr.CloseAttemptedConnection(id) - case RemoteNodeStatus_Connected, RemoteNodeStatus_HandshakeCompleted: + case RemoteNodeStatus_Connected, RemoteNodeStatus_VersionSent, RemoteNodeStatus_VerackSent, + RemoteNodeStatus_HandshakeCompleted: rn.cmgr.CloseConnection(id) } rn.setStatusTerminated() @@ -340,9 +331,6 @@ func (rn *RemoteNode) InitiateHandshake(nonce uint64) error { if rn.connectionStatus != RemoteNodeStatus_Connected { return fmt.Errorf("InitiateHandshake: Remote node is not connected") } - if rn.getHandshakeStage() != HandshakeStage_NotStarted { - return fmt.Errorf("InitiateHandshake: Handshake has already been initiated") - } if rn.GetPeer().IsOutbound() { versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) @@ -350,7 +338,7 @@ func (rn *RemoteNode) InitiateHandshake(nonce uint64) error { if err := rn.sendVersionMessage(nonce); err != nil { return fmt.Errorf("InitiateHandshake: Problem sending version message to peer (id= %d): %v", rn.id, err) } - rn.setHandshakeStage(HandshakeStage_VersionSent) + rn.setStatusVersionSent() } return nil } @@ -404,11 +392,9 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce rn.mtx.Lock() defer rn.mtx.Unlock() - if rn.connectionStatus != RemoteNodeStatus_Connected { - return fmt.Errorf("HandleVersionMessage: RemoteNode is not connected") - } - if rn.getHandshakeStage() != HandshakeStage_NotStarted && rn.getHandshakeStage() != HandshakeStage_VersionSent { - return fmt.Errorf("HandleVersionMessage: Handshake has already been initiated, stage: %v", rn.getHandshakeStage()) + if rn.connectionStatus != RemoteNodeStatus_Connected && rn.connectionStatus != RemoteNodeStatus_VersionSent { + return fmt.Errorf("HandleVersionMessage: RemoteNode is not connected or version exchange has already "+ + "been completed, connectionStatus: %v", rn.connectionStatus) } // Verify that the peer's version matches our minimal supported version. @@ -429,8 +415,17 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce // Decide on the protocol version to use for this connection. negotiatedVersion := rn.params.ProtocolVersion if verMsg.Version < rn.params.ProtocolVersion.ToUint64() { + // In order to smoothly transition to the PoS fork, we prevent establishing new outbound connections with + // outdated nodes that run on ProtocolVersion1. This is because ProtocolVersion1 nodes will not be able to + // validate the PoS blocks and will be stuck on the PoW chain, unless they upgrade to ProtocolVersion2. + if rn.params.ProtocolVersion == ProtocolVersion2 && rn.IsOutbound() { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v). Version too low. "+ + "Outbound RemoteNodes must use at least ProtocolVersion2, instead received version: %v", rn.id, verMsg.Version) + } + negotiatedVersion = NewProtocolVersionType(verMsg.Version) } + vMeta.negotiatedProtocolVersion = negotiatedVersion // Record the services the peer is advertising. @@ -455,7 +450,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce vMeta.minTxFeeRateNanosPerKB = verMsg.MinFeeRateNanosPerKB // Respond to the version message if this is an inbound peer. - if !rn.peer.IsOutbound() { + if !rn.IsOutbound() { if err := rn.sendVersionMessage(responseNonce); err != nil { return errors.Wrapf(err, "RemoteNode.HandleVersionMessage: Problem sending version message to peer (id= %d)", rn.id) } @@ -473,7 +468,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce // Update the timeSource now that we've gotten a version message from the peer. rn.cmgr.AddTimeSample(rn.peer.Address(), timeConnected) - rn.setHandshakeStage(HandshakeStage_VerackSent) + rn.setStatusVerackSent() return nil } @@ -526,16 +521,11 @@ func (rn *RemoteNode) HandleVerackMessage(vrkMsg *MsgDeSoVerack) error { rn.mtx.Lock() defer rn.mtx.Unlock() - if rn.connectionStatus != RemoteNodeStatus_Connected { + if rn.connectionStatus != RemoteNodeStatus_VerackSent { return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ "verack received while in state: %v", rn.id, rn.connectionStatus) } - if rn.getHandshakeStage() != HandshakeStage_VerackSent { - return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ - "verack received while in handshake stage: %v", rn.id, rn.getHandshakeStage()) - } - if rn.verackTimeExpected != nil && rn.verackTimeExpected.Before(time.Now()) { return fmt.Errorf("RemoteNode.HandleVerackMessage: Requesting disconnect for id: (%v) "+ "verack timeout. Time expected: %v, now: %v", rn.id, rn.verackTimeExpected.UnixMicro(), time.Now().UnixMicro()) @@ -556,9 +546,8 @@ func (rn *RemoteNode) HandleVerackMessage(vrkMsg *MsgDeSoVerack) error { // If we get here then the peer has successfully completed the handshake. vMeta.versionNegotiated = true - rn._logVersionSuccess(rn.peer) + rn._logVersionSuccess() rn.setStatusHandshakeCompleted() - rn.setHandshakeStage(HandshakeStage_Completed) rn.srv.NotifyHandshakePeerMessage(rn.peer) return nil @@ -639,7 +628,7 @@ func (rn *RemoteNode) validateVerackPoS(vrkMsg *MsgDeSoVerack) error { return nil } -func (rn *RemoteNode) _logVersionSuccess(peer *Peer) { +func (rn *RemoteNode) _logVersionSuccess() { inboundStr := "INBOUND" if rn.IsOutbound() { inboundStr = "OUTBOUND" @@ -648,7 +637,7 @@ func (rn *RemoteNode) _logVersionSuccess(peer *Peer) { if !rn.IsPersistent() { persistentStr = "NON-PERSISTENT" } - logStr := fmt.Sprintf("SUCCESS version negotiation for (%s) (%s) peer (%v).", inboundStr, persistentStr, peer) + logStr := fmt.Sprintf("SUCCESS version negotiation for (%s) (%s) id=(%v).", inboundStr, persistentStr, rn.id.ToUint64()) glog.V(1).Info(logStr) } From 48c0677ae8ab04c0a3882f436a318da58494c2b0 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Wed, 24 Jan 2024 11:48:38 -0500 Subject: [PATCH 07/37] Add HandshakeController (#861) PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment Integration testing updates PoS Block Producer: TxnConnectStatusByIndex (#672) * TransactionConnectStatus and ConnectFailingTransaction * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to 960001ce00bbc1c99afb6ca6f697748bd6d944f6. * Revert "Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions"" This reverts commit 10a147654c5147c28ec674d0650bb54c8d9cebce. * Revert "Merge branch 'p/bmf-status-connected' into p/failing-transactions" This reverts commit d3e543c4c3e6f03cc74087b05c268d4449ba1689, reversing changes made to a9f782751b11e8aa3a0977fd2b2535b1ef4d7242. * TransactionConnectStatus and ConnectFailingTransaction * Initial _connectFailingTransaction * ConnectFailingTransaction and GlobalParamsEntry updates * Fix merge conflicts * gofmt * Fix merge conflicts * Fix blockheight * Fix merge conflicts * gofmt * Revert connect failing transaction * Add TxnStatusConnectedIndex to block and header * Fix naming * Fix tests; remove asserts * Update comment RemoteNode and RemoteNodeId Initial remote node manager tests remote node tests Better connection testing framework Add validator integration test Fix validator-validator connection test; Add nonValidator-validator test Simplify indices Simplify remote node indexer; fix compilation Simplify RemoteNodeManager More RemoteNodeManager updates Nits --- integration_testing/blocksync_test.go | 10 +- integration_testing/connection_bridge.go | 64 ++- .../connection_controller_test.go | 451 ++++++++++++++++++ .../connection_controller_utils_test.go | 206 ++++++++ integration_testing/hypersync_test.go | 10 + integration_testing/migrations_test.go | 1 + integration_testing/mining_test.go | 4 +- integration_testing/tools.go | 40 +- integration_testing/txindex_test.go | 1 + lib/connection_controller.go | 329 +++++++++++++ lib/connection_manager.go | 123 +---- lib/constants.go | 8 + ..._controller.go => handshake_controller.go} | 7 + lib/network.go | 11 +- lib/network_test.go | 12 +- lib/peer.go | 5 +- lib/remote_node.go | 45 +- lib/remote_node_manager.go | 59 ++- lib/server.go | 50 +- 19 files changed, 1252 insertions(+), 184 deletions(-) create mode 100644 integration_testing/connection_controller_test.go create mode 100644 integration_testing/connection_controller_utils_test.go create mode 100644 lib/connection_controller.go rename lib/{pos_handshake_controller.go => handshake_controller.go} (96%) diff --git a/integration_testing/blocksync_test.go b/integration_testing/blocksync_test.go index 8be96d735..be87aae3a 100644 --- a/integration_testing/blocksync_test.go +++ b/integration_testing/blocksync_test.go @@ -40,9 +40,8 @@ func TestSimpleBlockSync(t *testing.T) { // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) + // TODO: Dial an outbound connection from node2 to node1 + // Fix other integration tests. // wait for node2 to sync blocks. waitForNodeToFullySync(node2) @@ -99,6 +98,7 @@ func TestSimpleSyncRestart(t *testing.T) { compareNodesByDB(t, node1, node2, 0) fmt.Println("Random restart successful! Random height was", randomHeight) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } @@ -153,7 +153,7 @@ func TestSimpleSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { randomHeight := randomUint32Between(t, 10, config2.MaxSyncBlockHeight) fmt.Println("Random height for a restart (re-use if test failed):", randomHeight) - disconnectAtBlockHeight(t, node2, bridge12, randomHeight) + disconnectAtBlockHeight(node2, bridge12, randomHeight) // bridge the nodes together. bridge23 := NewConnectionBridge(node2, node3) @@ -167,6 +167,8 @@ func TestSimpleSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { compareNodesByDB(t, node3, node2, 0) fmt.Println("Random restart successful! Random height was", randomHeight) fmt.Println("Databases match!") + bridge12.Disconnect() + bridge23.Disconnect() node1.Stop() node2.Stop() node3.Stop() diff --git a/integration_testing/connection_bridge.go b/integration_testing/connection_bridge.go index 4c3b28dde..f6a9897ed 100644 --- a/integration_testing/connection_bridge.go +++ b/integration_testing/connection_bridge.go @@ -13,6 +13,7 @@ import ( "time" ) +// TODO: DEPRECATE // ConnectionBridge is a bidirectional communication channel between two nodes. A bridge creates a pair of inbound and // outbound peers for each of the nodes to handle communication. In total, it creates four peers. // @@ -111,13 +112,14 @@ func (bridge *ConnectionBridge) createInboundConnection(node *cmd.Node) *lib.Pee } // This channel is redundant in our setting. - messagesFromPeer := make(chan *lib.ServerMessage) + messagesFromPeer := make(chan *lib.ServerMessage, 100) + newPeerChan := make(chan *lib.Peer, 100) + donePeerChan := make(chan *lib.Peer, 100) // Because it is an inbound Peer of the node, it is simultaneously a "fake" outbound Peer of the bridge. // Hence, we will mark the _isOutbound parameter as "true" in NewPeer. - peer := lib.NewPeer(conn, true, netAddress, true, - 10000, 0, &lib.DeSoMainnetParams, - messagesFromPeer, nil, nil, lib.NodeSyncTypeAny) - peer.ID = uint64(lib.RandInt64(math.MaxInt64)) + peer := lib.NewPeer(uint64(lib.RandInt64(math.MaxInt64)), conn, true, + netAddress, true, 10000, 0, &lib.DeSoMainnetParams, + messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, newPeerChan, donePeerChan) return peer } @@ -139,27 +141,28 @@ func (bridge *ConnectionBridge) createOutboundConnection(node *cmd.Node, otherNo fmt.Println("createOutboundConnection: Got a connection from remote:", conn.RemoteAddr().String(), "on listener:", ll.Addr().String()) - na, err := lib.IPToNetAddr(conn.RemoteAddr().String(), otherNode.Server.GetConnectionManager().AddrMgr, - otherNode.Params) - messagesFromPeer := make(chan *lib.ServerMessage) - peer := lib.NewPeer(conn, false, na, false, - 10000, 0, bridge.nodeB.Params, - messagesFromPeer, nil, nil, lib.NodeSyncTypeAny) - peer.ID = uint64(lib.RandInt64(math.MaxInt64)) + addrMgr := addrmgr.New("", net.LookupIP) + na, err := lib.IPToNetAddr(conn.RemoteAddr().String(), addrMgr, otherNode.Params) + messagesFromPeer := make(chan *lib.ServerMessage, 100) + newPeerChan := make(chan *lib.Peer, 100) + donePeerChan := make(chan *lib.Peer, 100) + peer := lib.NewPeer(uint64(lib.RandInt64(math.MaxInt64)), conn, + false, na, false, 10000, 0, bridge.nodeB.Params, + messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, newPeerChan, donePeerChan) bridge.newPeerChan <- peer //} }(ll) // Make the provided node to make an outbound connection to our listener. - netAddress, _ := lib.IPToNetAddr(ll.Addr().String(), addrmgr.New("", net.LookupIP), &lib.DeSoMainnetParams) - fmt.Println("createOutboundConnection: IP:", netAddress.IP, "Port:", netAddress.Port) - go node.Server.GetConnectionManager().ConnectPeer(nil, netAddress) + addrMgr := addrmgr.New("", net.LookupIP) + addr, _ := lib.IPToNetAddr(ll.Addr().String(), addrMgr, node.Params) + go node.Server.GetConnectionManager().DialOutboundConnection(addr, uint64(lib.RandInt64(math.MaxInt64))) } // getVersionMessage simulates a version message that the provided node would have sent. func (bridge *ConnectionBridge) getVersionMessage(node *cmd.Node) *lib.MsgDeSoVersion { ver := lib.NewMessage(lib.MsgTypeVersion).(*lib.MsgDeSoVersion) - ver.Version = node.Params.ProtocolVersion + ver.Version = node.Params.ProtocolVersion.ToUint64() ver.TstampSecs = time.Now().Unix() ver.Nonce = uint64(lib.RandInt64(math.MaxInt64)) ver.UserAgent = node.Params.UserAgent @@ -172,12 +175,29 @@ func (bridge *ConnectionBridge) getVersionMessage(node *cmd.Node) *lib.MsgDeSoVe } if node.Server != nil { - ver.LatestBlockHeight = uint32(node.Server.GetBlockchain().BlockTip().Header.Height) + ver.LatestBlockHeight = node.Server.GetBlockchain().BlockTip().Header.Height } ver.MinFeeRateNanosPerKB = node.Config.MinFeerate return ver } +func ReadWithTimeout(readFunc func() error, readTimeout time.Duration) error { + errChan := make(chan error) + go func() { + errChan <- readFunc() + }() + select { + case err := <-errChan: + { + return err + } + case <-time.After(readTimeout): + { + return fmt.Errorf("ReadWithTimeout: Timed out reading message") + } + } +} + // startConnection starts the connection by performing version and verack exchange with // the provided connection, pretending to be the otherNode. func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode *cmd.Node) error { @@ -192,7 +212,7 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode } // Wait for a response to the version message. - if err := connection.ReadWithTimeout( + if err := ReadWithTimeout( func() error { msg, err := connection.ReadDeSoMessage() if err != nil { @@ -215,7 +235,7 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode // Now prepare the verack message. verackMsg := lib.NewMessage(lib.MsgTypeVerack) - verackMsg.(*lib.MsgDeSoVerack).Nonce = connection.VersionNonceReceived + verackMsg.(*lib.MsgDeSoVerack).NonceReceived = connection.VersionNonceReceived // And send it to the connection. if err := connection.WriteDeSoMessage(verackMsg); err != nil { @@ -223,7 +243,7 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode } // And finally wait for connection's response to the verack message. - if err := connection.ReadWithTimeout( + if err := ReadWithTimeout( func() error { msg, err := connection.ReadDeSoMessage() if err != nil { @@ -234,9 +254,9 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode return fmt.Errorf("message is not verack! Type: %v", msg.GetMsgType()) } verackMsg := msg.(*lib.MsgDeSoVerack) - if verackMsg.Nonce != connection.VersionNonceSent { + if verackMsg.NonceReceived != connection.VersionNonceSent { return fmt.Errorf("verack message nonce doesn't match (received: %v, sent: %v)", - verackMsg.Nonce, connection.VersionNonceSent) + verackMsg.NonceReceived, connection.VersionNonceSent) } return nil }, lib.DeSoMainnetParams.VersionNegotiationTimeout); err != nil { diff --git a/integration_testing/connection_controller_test.go b/integration_testing/connection_controller_test.go new file mode 100644 index 000000000..01fb01046 --- /dev/null +++ b/integration_testing/connection_controller_test.go @@ -0,0 +1,451 @@ +package integration_testing + +import ( + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/lib" + "github.com/stretchr/testify/require" + "testing" +) + +func TestConnectionControllerNonValidator(t *testing.T) { + require := require.New(t) + + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1 = startNode(t, node1) + defer node1.Stop() + + // Make sure NonValidator Node1 can create an outbound connection to NonValidator Node2 + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2 = startNode(t, node2) + + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + waitForNonValidatorOutboundConnection(t, node1, node2) + waitForNonValidatorInboundConnection(t, node2, node1) + + node2.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #1 passed | Successfully created outbound connection from NonValidator Node1 to NonValidator Node2") + + // Make sure NonValidator Node1 can create an outbound connection to validator Node3 + blsPriv3, err := bls.NewPrivateKey() + require.NoError(err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + node3 = startNode(t, node3) + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node1, node3) + waitForNonValidatorInboundConnection(t, node3, node1) + + node3.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #2 passed | Successfully created outbound connection from NonValidator Node1 to Validator Node3") + + // Make sure NonValidator Node1 can create a non-validator connection to validator Node4 + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4 = startNode(t, node4) + defer node4.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node1, node4) + waitForNonValidatorInboundConnection(t, node4, node1) + t.Logf("Test #3 passed | Successfully created outbound connection from NonValidator Node1 to Validator Node4") +} + +func TestConnectionControllerValidator(t *testing.T) { + require := require.New(t) + + blsPriv1, err := bls.NewPrivateKey() + require.NoError(err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) + node1 = startNode(t, node1) + defer node1.Stop() + + // Make sure Validator Node1 can create an outbound connection to Validator Node2 + blsPriv2, err := bls.NewPrivateKey() + blsPub2 := blsPriv2.PublicKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + node2 = startNode(t, node2) + + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPub2)) + waitForValidatorConnection(t, node1, node2) + waitForValidatorConnection(t, node2, node1) + + node2.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #1 passed | Successfully created outbound connection from Validator Node1 to Validator Node2") + + // Make sure Validator Node1 can create an outbound connection to NonValidator Node3 + node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3 = startNode(t, node3) + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + waitForNonValidatorOutboundConnection(t, node1, node3) + waitForValidatorConnection(t, node3, node1) + + node3.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #2 passed | Successfully created outbound connection from Validator Node1 to NonValidator Node3") + + // Make sure Validator Node1 can create an outbound non-validator connection to Validator Node4 + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4 = startNode(t, node4) + defer node4.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node1, node4) + waitForValidatorConnection(t, node4, node1) + t.Logf("Test #3 passed | Successfully created non-validator outbound connection from Validator Node1 to Validator Node4") +} + +func TestConnectionControllerHandshakeDataErrors(t *testing.T) { + require := require.New(t) + + blsPriv1, err := bls.NewPrivateKey() + require.NoError(err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) + + // This node should have ProtocolVersion2, but it has ProtocolVersion1 as we want it to disconnect. + blsPriv2, err := bls.NewPrivateKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + node2.Params.ProtocolVersion = lib.ProtocolVersion1 + + node1 = startNode(t, node1) + node2 = startNode(t, node2) + defer node1.Stop() + defer node2.Stop() + + cc := node2.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node2) + t.Logf("Test #1 passed | Successfuly disconnected node with SFValidator flag and ProtocolVersion1 mismatch") + + // This node shouldn't have ProtocolVersion3, which is beyond latest ProtocolVersion2, meaning nodes should disconnect. + blsPriv3, err := bls.NewPrivateKey() + require.NoError(err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + node3.Params.ProtocolVersion = lib.ProtocolVersionType(3) + node3 = startNode(t, node3) + defer node3.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node3) + t.Logf("Test #2 passed | Successfuly disconnected node with ProtocolVersion3") + + // This node shouldn't have ProtocolVersion0, which is outdated. + node4 := spawnNonValidatorNodeProtocol2(t, 18003, "node4") + node4.Params.ProtocolVersion = lib.ProtocolVersion0 + node4 = startNode(t, node4) + defer node4.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node4) + t.Logf("Test #3 passed | Successfuly disconnected node with ProtocolVersion0") + + // This node will have a different public key than the one it's supposed to have. + blsPriv5, err := bls.NewPrivateKey() + require.NoError(err) + blsPriv5Wrong, err := bls.NewPrivateKey() + require.NoError(err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + node5 = startNode(t, node5) + defer node5.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsPriv5Wrong.PublicKey())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node5) + t.Logf("Test #4 passed | Successfuly disconnected node with public key mismatch") + + // This node will be missing SFPosValidator flag while being connected as a validator. + blsPriv6, err := bls.NewPrivateKey() + require.NoError(err) + node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") + node6 = startNode(t, node6) + defer node6.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateValidatorConnection(node6.Listeners[0].Addr().String(), blsPriv6.PublicKey())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node6) + t.Logf("Test #5 passed | Successfuly disconnected supposed validator node with missing SFPosValidator flag") + + // This node will have ProtocolVersion1 and be connected as an outbound non-validator node. + node7 := spawnNonValidatorNodeProtocol2(t, 18006, "node7") + node7.Params.ProtocolVersion = lib.ProtocolVersion1 + node7 = startNode(t, node7) + defer node7.Stop() + + cc = node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node7) + t.Logf("Test #6 passed | Successfuly disconnected outbound non-validator node with ProtocolVersion1") +} + +func TestConnectionControllerHandshakeTimeouts(t *testing.T) { + require := require.New(t) + + // Set version negotiation timeout to 0 to make sure that the node will be disconnected + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.VersionNegotiationTimeout = 0 + node1 = startNode(t, node1) + defer node1.Stop() + + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2 = startNode(t, node2) + defer node2.Stop() + + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node2) + t.Logf("Test #1 passed | Successfuly disconnected node after version negotiation timeout") + + // Now let's try timing out verack exchange + node1.Params.VersionNegotiationTimeout = lib.DeSoTestnetParams.VersionNegotiationTimeout + node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3.Params.VerackNegotiationTimeout = 0 + node3 = startNode(t, node3) + defer node3.Stop() + + cc = node3.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node1) + waitForEmptyRemoteNodeIndexer(t, node3) + t.Logf("Test #2 passed | Successfuly disconnected node after verack exchange timeout") + + // Now let's try timing out handshake between two validators node4 and node5 + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4.Params.HandshakeTimeoutMicroSeconds = 0 + node4 = startNode(t, node4) + defer node4.Stop() + + blsPriv5, err := bls.NewPrivateKey() + require.NoError(err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + node5 = startNode(t, node5) + defer node5.Stop() + + cc = node4.Server.GetConnectionController() + require.NoError(cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsPriv5.PublicKey())) + waitForEmptyRemoteNodeIndexer(t, node4) + waitForEmptyRemoteNodeIndexer(t, node5) + t.Logf("Test #3 passed | Successfuly disconnected validator node after handshake timeout") +} + +func TestConnectionControllerValidatorDuplication(t *testing.T) { + require := require.New(t) + + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1 = startNode(t, node1) + defer node1.Stop() + + // Create a validator Node2 + blsPriv2, err := bls.NewPrivateKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + node2 = startNode(t, node2) + + // Create a duplicate validator Node3 + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv2) + node3 = startNode(t, node3) + + // Create validator connection from Node1 to Node2 and from Node1 to Node3 + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPriv2.PublicKey())) + // This should fail out right because Node3 has a duplicate public key. + require.Error(cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsPriv2.PublicKey())) + waitForValidatorConnection(t, node1, node2) + waitForNonValidatorInboundConnection(t, node2, node1) + + // Now create an outbound connection from Node3 to Node1, which should pass handshake, but then fail because + // Node1 already has a validator connection to Node2 with the same public key. + cc3 := node3.Server.GetConnectionController() + require.NoError(cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node3) + waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) + t.Logf("Test #1 passed | Successfuly rejected duplicate validator connection with inbound/outbound validators") + + node3.Stop() + node2.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + + // Create two more validators Node4, Node5 with duplicate public keys + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4 = startNode(t, node4) + defer node4.Stop() + + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv4) + node5 = startNode(t, node5) + defer node5.Stop() + + // Create validator connections from Node4 to Node1 and from Node5 to Node1 + cc4 := node4.Server.GetConnectionController() + require.NoError(cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node1, node4) + waitForNonValidatorOutboundConnection(t, node4, node1) + cc5 := node5.Server.GetConnectionController() + require.NoError(cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node5) + waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) + t.Logf("Test #2 passed | Successfuly rejected duplicate validator connection with multiple outbound validators") +} + +func TestConnectionControllerProtocolDifference(t *testing.T) { + require := require.New(t) + + // Create a ProtocolVersion1 Node1 + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.ProtocolVersion = lib.ProtocolVersion1 + node1 = startNode(t, node1) + defer node1.Stop() + + // Create a ProtocolVersion2 NonValidator Node2 + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2 = startNode(t, node2) + + // Create non-validator connection from Node1 to Node2 + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + waitForNonValidatorOutboundConnection(t, node1, node2) + waitForNonValidatorInboundConnection(t, node2, node1) + t.Logf("Test #1 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 non-validator") + + // Create a ProtocolVersion2 Validator Node3 + blsPriv3, err := bls.NewPrivateKey() + require.NoError(err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + node3 = startNode(t, node3) + + // Create validator connection from Node1 to Node3 + require.NoError(cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsPriv3.PublicKey())) + waitForValidatorConnection(t, node1, node3) + waitForNonValidatorInboundConnection(t, node3, node1) + t.Logf("Test #2 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 validator") + + node2.Stop() + node3.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + + // Create a ProtocolVersion2 validator Node4 + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4 = startNode(t, node4) + defer node4.Stop() + + // Attempt to create non-validator connection from Node4 to Node1 + cc = node4.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node4) + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #3 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") + + // Attempt to create validator connection from Node4 to Node1 + require.NoError(cc.CreateValidatorConnection(node1.Listeners[0].Addr().String(), blsPriv4.PublicKey())) + waitForEmptyRemoteNodeIndexer(t, node4) + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #4 passed | Successfuly rejected validator connection from ProtocolVersion2 node to ProtcolVersion1 node") + + // Create a ProtocolVersion2 non-validator Node5 + node5 := spawnNonValidatorNodeProtocol2(t, 18004, "node5") + node5 = startNode(t, node5) + defer node5.Stop() + + // Attempt to create non-validator connection from Node5 to Node1 + cc = node5.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + waitForEmptyRemoteNodeIndexer(t, node5) + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #5 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") +} + +func TestConnectionControllerPersistentConnection(t *testing.T) { + require := require.New(t) + + // Create a NonValidator Node1 + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1 = startNode(t, node1) + + // Create a Validator Node2 + blsPriv2, err := bls.NewPrivateKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + node2 = startNode(t, node2) + + // Create a persistent connection from Node1 to Node2 + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node1, node2) + waitForNonValidatorInboundConnection(t, node2, node1) + node2.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + t.Logf("Test #1 passed | Successfuly created persistent connection from non-validator Node1 to validator Node2") + + // Create a Non-validator Node3 + node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3 = startNode(t, node3) + + // Create a persistent connection from Node1 to Node3 + require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String())) + waitForNonValidatorOutboundConnection(t, node1, node3) + waitForNonValidatorInboundConnection(t, node3, node1) + node3.Stop() + waitForEmptyRemoteNodeIndexer(t, node1) + node1.Stop() + t.Logf("Test #2 passed | Successfuly created persistent connection from non-validator Node1 to non-validator Node3") + + // Create a Validator Node4 + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + node4 = startNode(t, node4) + defer node4.Stop() + + // Create a non-validator Node5 + node5 := spawnNonValidatorNodeProtocol2(t, 18004, "node5") + node5 = startNode(t, node5) + + // Create a persistent connection from Node4 to Node5 + cc = node4.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String())) + waitForNonValidatorOutboundConnection(t, node4, node5) + waitForValidatorConnection(t, node5, node4) + node5.Stop() + waitForEmptyRemoteNodeIndexer(t, node4) + t.Logf("Test #3 passed | Successfuly created persistent connection from validator Node4 to non-validator Node5") + + // Create a Validator Node6 + blsPriv6, err := bls.NewPrivateKey() + require.NoError(err) + node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsPriv6) + node6 = startNode(t, node6) + defer node6.Stop() + + // Create a persistent connection from Node4 to Node6 + require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String())) + waitForValidatorConnection(t, node4, node6) + waitForValidatorConnection(t, node6, node4) + t.Logf("Test #4 passed | Successfuly created persistent connection from validator Node4 to validator Node6") +} diff --git a/integration_testing/connection_controller_utils_test.go b/integration_testing/connection_controller_utils_test.go new file mode 100644 index 000000000..4d5594634 --- /dev/null +++ b/integration_testing/connection_controller_utils_test.go @@ -0,0 +1,206 @@ +package integration_testing + +import ( + "fmt" + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/cmd" + "github.com/deso-protocol/core/lib" + "os" + "testing" +) + +func waitForValidatorConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { + userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + n1ValidatedN2 := func() bool { + if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, true, false, false) { + return false + } + rnFromN2 := getRemoteNodeWithUserAgent(node1, userAgentN2) + if rnFromN2 == nil { + return false + } + if !rnFromN2.IsHandshakeCompleted() { + return false + } + return true + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to outbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) +} + +func waitForNonValidatorOutboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { + userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + n1ValidatedN2 := func() bool { + if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, true, false) { + return false + } + rnFromN2 := getRemoteNodeWithUserAgent(node1, userAgentN2) + if rnFromN2 == nil { + return false + } + if !rnFromN2.IsHandshakeCompleted() { + return false + } + if rnFromN2.GetValidatorPublicKey() != nil { + return false + } + return true + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to outbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) +} + +func waitForNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { + userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + n1ValidatedN2 := func() bool { + if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, false, true) { + return false + } + rnFromN2 := getRemoteNodeWithUserAgent(node1, userAgentN2) + if rnFromN2 == nil { + return false + } + if !rnFromN2.IsHandshakeCompleted() { + return false + } + if rnFromN2.GetValidatorPublicKey() != nil { + return false + } + return true + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) +} + +func waitForEmptyRemoteNodeIndexer(t *testing.T, node1 *cmd.Node) { + userAgentN1 := node1.Params.UserAgent + rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + n1ValidatedN2 := func() bool { + if true != checkRemoteNodeIndexerEmpty(rnManagerN1) { + return false + } + return true + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to disconnect from all RemoteNodes", userAgentN1), n1ValidatedN2) +} + +func waitForCountRemoteNodeIndexer(t *testing.T, node1 *cmd.Node, allCount int, validatorCount int, + nonValidatorOutboundCount int, nonValidatorInboundCount int) { + + userAgentN1 := node1.Params.UserAgent + rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + n1ValidatedN2 := func() bool { + if true != checkRemoteNodeIndexerCount(rnManagerN1, allCount, validatorCount, nonValidatorOutboundCount, nonValidatorInboundCount) { + return false + } + return true + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have appropriate RemoteNodes counts", userAgentN1), n1ValidatedN2) +} + +func checkRemoteNodeIndexerUserAgent(manager *lib.RemoteNodeManager, userAgent string, validator bool, + nonValidatorOutbound bool, nonValidatorInbound bool) bool { + + if true != checkUserAgentInRemoteNodeList(userAgent, manager.GetAllRemoteNodes().GetAll()) { + return false + } + if validator != checkUserAgentInRemoteNodeList(userAgent, manager.GetValidatorIndex().GetAll()) { + return false + } + if nonValidatorOutbound != checkUserAgentInRemoteNodeList(userAgent, manager.GetNonValidatorOutboundIndex().GetAll()) { + return false + } + if nonValidatorInbound != checkUserAgentInRemoteNodeList(userAgent, manager.GetNonValidatorInboundIndex().GetAll()) { + return false + } + + return true +} + +func checkRemoteNodeIndexerCount(manager *lib.RemoteNodeManager, allCount int, validatorCount int, + nonValidatorOutboundCount int, nonValidatorInboundCount int) bool { + + if allCount != manager.GetAllRemoteNodes().Count() { + return false + } + if validatorCount != manager.GetValidatorIndex().Count() { + return false + } + if nonValidatorOutboundCount != manager.GetNonValidatorOutboundIndex().Count() { + return false + } + if nonValidatorInboundCount != manager.GetNonValidatorInboundIndex().Count() { + return false + } + + return true +} + +func checkRemoteNodeIndexerEmpty(manager *lib.RemoteNodeManager) bool { + if manager.GetAllRemoteNodes().Count() != 0 { + return false + } + if manager.GetValidatorIndex().Count() != 0 { + return false + } + if manager.GetNonValidatorOutboundIndex().Count() != 0 { + return false + } + if manager.GetNonValidatorInboundIndex().Count() != 0 { + return false + } + return true +} + +func checkUserAgentInRemoteNodeList(userAgent string, rnList []*lib.RemoteNode) bool { + for _, rn := range rnList { + if rn == nil { + continue + } + if rn.GetUserAgent() == userAgent { + return true + } + } + return false +} + +func getRemoteNodeWithUserAgent(node *cmd.Node, userAgent string) *lib.RemoteNode { + rnManager := node.Server.GetConnectionController().GetRemoteNodeManager() + rnList := rnManager.GetAllRemoteNodes().GetAll() + for _, rn := range rnList { + if rn.GetUserAgent() == userAgent { + return rn + } + } + return nil +} + +func spawnNonValidatorNodeProtocol2(t *testing.T, port uint32, id string) *cmd.Node { + dbDir := getDirectory(t) + t.Cleanup(func() { + os.RemoveAll(dbDir) + }) + config := generateConfig(t, port, dbDir, 10) + config.SyncType = lib.NodeSyncTypeBlockSync + node := cmd.NewNode(config) + node.Params.UserAgent = id + node.Params.ProtocolVersion = lib.ProtocolVersion2 + return node +} + +func spawnValidatorNodeProtocol2(t *testing.T, port uint32, id string, blsPriv *bls.PrivateKey) *cmd.Node { + dbDir := getDirectory(t) + t.Cleanup(func() { + os.RemoveAll(dbDir) + }) + config := generateConfig(t, port, dbDir, 10) + config.SyncType = lib.NodeSyncTypeBlockSync + config.PosValidatorSeed = blsPriv.ToString() + node := cmd.NewNode(config) + node.Params.UserAgent = id + node.Params.ProtocolVersion = lib.ProtocolVersion2 + return node +} diff --git a/integration_testing/hypersync_test.go b/integration_testing/hypersync_test.go index aad90ee0e..bc4c8a7c0 100644 --- a/integration_testing/hypersync_test.go +++ b/integration_testing/hypersync_test.go @@ -53,6 +53,7 @@ func TestSimpleHyperSync(t *testing.T) { //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } @@ -122,6 +123,8 @@ func TestHyperSyncFromHyperSyncedNode(t *testing.T) { compareNodesByChecksum(t, node2, node3) fmt.Println("Databases match!") + bridge12.Disconnect() + bridge23.Disconnect() node1.Stop() node2.Stop() node3.Stop() @@ -178,6 +181,7 @@ func TestSimpleHyperSyncRestart(t *testing.T) { compareNodesByChecksum(t, node1, node2) fmt.Println("Random restart successful! Random sync prefix was", syncPrefix) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } @@ -255,6 +259,8 @@ func TestSimpleHyperSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { compareNodesByChecksum(t, node1, node2) fmt.Println("Random restart successful! Random sync prefix was", syncPrefix) fmt.Println("Databases match!") + bridge12.Disconnect() + bridge23.Disconnect() node1.Stop() node2.Stop() node3.Stop() @@ -349,6 +355,7 @@ func TestArchivalMode(t *testing.T) { //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } @@ -406,6 +413,9 @@ func TestBlockSyncFromArchivalModeHyperSync(t *testing.T) { //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) fmt.Println("Databases match!") + bridge12.Disconnect() + bridge23.Disconnect() node1.Stop() node2.Stop() + node3.Stop() } diff --git a/integration_testing/migrations_test.go b/integration_testing/migrations_test.go index b0a692b52..1419d483e 100644 --- a/integration_testing/migrations_test.go +++ b/integration_testing/migrations_test.go @@ -59,6 +59,7 @@ func TestEncoderMigrations(t *testing.T) { compareNodesByChecksum(t, node1, node2) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } diff --git a/integration_testing/mining_test.go b/integration_testing/mining_test.go index 49a23333c..88de5e097 100644 --- a/integration_testing/mining_test.go +++ b/integration_testing/mining_test.go @@ -29,9 +29,7 @@ func TestRegtestMiner(t *testing.T) { // wait for node1 to sync blocks mineHeight := uint32(40) - listener := make(chan bool) - listenForBlockHeight(t, node1, mineHeight, listener) - <-listener + <-listenForBlockHeight(node1, mineHeight) node1.Stop() } diff --git a/integration_testing/tools.go b/integration_testing/tools.go index c73b82873..2f97e942d 100644 --- a/integration_testing/tools.go +++ b/integration_testing/tools.go @@ -150,7 +150,8 @@ func compareNodesByChecksum(t *testing.T, nodeA *cmd.Node, nodeB *cmd.Node) { // compareNodesByState will look through all state records in nodeA and nodeB databases and will compare them. // The nodes pass this comparison iff they have identical states. func compareNodesByState(t *testing.T, nodeA *cmd.Node, nodeB *cmd.Node, verbose int) { - compareNodesByStateWithPrefixList(t, nodeA.ChainDB, nodeB.ChainDB, lib.StatePrefixes.StatePrefixesList, verbose) + compareNodesByStateWithPrefixList(t, nodeA.Server.GetBlockchain().DB(), nodeB.Server.GetBlockchain().DB(), + lib.StatePrefixes.StatePrefixesList, verbose) } // compareNodesByDB will look through all records in nodeA and nodeB databases and will compare them. @@ -164,7 +165,8 @@ func compareNodesByDB(t *testing.T, nodeA *cmd.Node, nodeB *cmd.Node, verbose in } prefixList = append(prefixList, []byte{prefix}) } - compareNodesByStateWithPrefixList(t, nodeA.ChainDB, nodeB.ChainDB, prefixList, verbose) + compareNodesByStateWithPrefixList(t, nodeA.Server.GetBlockchain().DB(), nodeB.Server.GetBlockchain().DB(), + prefixList, verbose) } // compareNodesByDB will look through all records in nodeA and nodeB txindex databases and will compare them. @@ -386,25 +388,25 @@ func restartNode(t *testing.T, node *cmd.Node) *cmd.Node { } // listenForBlockHeight busy-waits until the node's block tip reaches provided height. -func listenForBlockHeight(t *testing.T, node *cmd.Node, height uint32, signal chan<- bool) { +func listenForBlockHeight(node *cmd.Node, height uint32) (_listener chan bool) { + listener := make(chan bool) ticker := time.NewTicker(1 * time.Millisecond) go func() { for { <-ticker.C if node.Server.GetBlockchain().BlockTip().Height >= height { - signal <- true + listener <- true break } } }() + return listener } // disconnectAtBlockHeight busy-waits until the node's block tip reaches provided height, and then disconnects // from the provided bridge. -func disconnectAtBlockHeight(t *testing.T, syncingNode *cmd.Node, bridge *ConnectionBridge, height uint32) { - listener := make(chan bool) - listenForBlockHeight(t, syncingNode, height, listener) - <-listener +func disconnectAtBlockHeight(syncingNode *cmd.Node, bridge *ConnectionBridge, height uint32) { + <-listenForBlockHeight(syncingNode, height) bridge.Disconnect() } @@ -414,7 +416,7 @@ func restartAtHeightAndReconnectNode(t *testing.T, node *cmd.Node, source *cmd.N height uint32) (_node *cmd.Node, _bridge *ConnectionBridge) { require := require.New(t) - disconnectAtBlockHeight(t, node, currentBridge, height) + disconnectAtBlockHeight(node, currentBridge, height) newNode := restartNode(t, node) // Wait after the restart. time.Sleep(1 * time.Second) @@ -475,3 +477,23 @@ func randomUint32Between(t *testing.T, min, max uint32) uint32 { randomHeight := uint32(randomNumber) % (max - min) return randomHeight + min } + +func waitForCondition(t *testing.T, id string, condition func() bool) { + signalChan := make(chan struct{}) + go func() { + for { + if condition() { + signalChan <- struct{}{} + return + } + time.Sleep(1 * time.Millisecond) + } + }() + + select { + case <-signalChan: + return + case <-time.After(5 * time.Second): + t.Fatalf("Condition timed out | %s", id) + } +} diff --git a/integration_testing/txindex_test.go b/integration_testing/txindex_test.go index aa13fd265..dfd398557 100644 --- a/integration_testing/txindex_test.go +++ b/integration_testing/txindex_test.go @@ -57,6 +57,7 @@ func TestSimpleTxIndex(t *testing.T) { compareNodesByDB(t, node1, node2, 0) compareNodesByTxIndex(t, node1, node2, 0) fmt.Println("Databases match!") + bridge.Disconnect() node1.Stop() node2.Stop() } diff --git a/lib/connection_controller.go b/lib/connection_controller.go new file mode 100644 index 000000000..fef9fa887 --- /dev/null +++ b/lib/connection_controller.go @@ -0,0 +1,329 @@ +package lib + +import ( + "fmt" + "github.com/btcsuite/btcd/addrmgr" + "github.com/btcsuite/btcd/wire" + "github.com/deso-protocol/core/bls" + "github.com/golang/glog" + "github.com/pkg/errors" + "net" + "strconv" +) + +// ConnectionController is a structure that oversees all connections to remote nodes. It is responsible for kicking off +// the initial connections a node makes to the network. It is also responsible for creating RemoteNodes from all +// successful outbound and inbound connections. The ConnectionController also ensures that the node is connected to +// the active validators, once the node reaches Proof of Stake. +// TODO: Document more in later PRs +type ConnectionController struct { + // The parameters we are initialized with. + params *DeSoParams + + cmgr *ConnectionManager + blsKeystore *BLSKeystore + + handshake *HandshakeController + + rnManager *RemoteNodeManager + + // The address manager keeps track of peer addresses we're aware of. When + // we need to connect to a new outbound peer, it chooses one of the addresses + // it's aware of at random and provides it to us. + AddrMgr *addrmgr.AddrManager + + // When --connectips is set, we don't connect to anything from the addrmgr. + connectIps []string + + // The target number of non-validator outbound remote nodes we want to have. We will disconnect remote nodes once + // we've exceeded this number of outbound connections. + targetNonValidatorOutboundRemoteNodes uint32 + // The target number of non-validator inbound remote nodes we want to have. We will disconnect remote nodes once + // we've exceeded this number of inbound connections. + targetNonValidatorInboundRemoteNodes uint32 + // When true, only one connection per IP is allowed. Prevents eclipse attacks + // among other things. + limitOneInboundRemoteNodePerIP bool +} + +func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handshakeController *HandshakeController, + rnManager *RemoteNodeManager, blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, targetNonValidatorOutboundRemoteNodes uint32, + targetNonValidatorInboundRemoteNodes uint32, limitOneInboundConnectionPerIP bool) *ConnectionController { + + return &ConnectionController{ + params: params, + cmgr: cmgr, + blsKeystore: blsKeystore, + handshake: handshakeController, + rnManager: rnManager, + AddrMgr: addrMgr, + targetNonValidatorOutboundRemoteNodes: targetNonValidatorOutboundRemoteNodes, + targetNonValidatorInboundRemoteNodes: targetNonValidatorInboundRemoteNodes, + limitOneInboundRemoteNodePerIP: limitOneInboundConnectionPerIP, + } +} + +func (cc *ConnectionController) GetRemoteNodeManager() *RemoteNodeManager { + return cc.rnManager +} + +// ########################### +// ## Handlers (Peer, DeSoMessage) +// ########################### + +func (cc *ConnectionController) _handleDonePeerMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeDisconnectedPeer { + return + } + + cc.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) +} + +func (cc *ConnectionController) _handleAddrMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeAddr { + return + } + + // TODO +} + +func (cc *ConnectionController) _handleGetAddrMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeGetAddr { + return + } + + // TODO +} + +// _handleNewConnectionMessage is called when a new outbound or inbound connection is established. It is responsible +// for creating a RemoteNode from the connection and initiating the handshake. The incoming DeSoMessage is a control message. +func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeNewConnection { + return + } + + msg, ok := desoMsg.(*MsgDeSoNewConnection) + if !ok { + return + } + + var remoteNode *RemoteNode + var err error + switch msg.Connection.GetConnectionType() { + case ConnectionTypeInbound: + remoteNode, err = cc.processInboundConnection(msg.Connection) + if err != nil { + glog.Errorf("ConnectionController.handleNewConnectionMessage: Problem handling inbound connection: %v", err) + msg.Connection.Close() + return + } + case ConnectionTypeOutbound: + remoteNode, err = cc.processOutboundConnection(msg.Connection) + if err != nil { + glog.Errorf("ConnectionController.handleNewConnectionMessage: Problem handling outbound connection: %v", err) + cc.cleanupFailedOutboundConnection(msg.Connection) + return + } + } + + // If we made it here, we have a valid remote node. We will now initiate the handshake. + cc.handshake.InitiateHandshake(remoteNode) +} + +func (cc *ConnectionController) cleanupFailedOutboundConnection(connection Connection) { + oc, ok := connection.(*outboundConnection) + if !ok { + return + } + + id := NewRemoteNodeId(oc.attemptId) + rn := cc.rnManager.GetRemoteNodeById(id) + if rn != nil { + cc.rnManager.Disconnect(rn) + } + cc.cmgr.RemoveAttemptedOutboundAddrs(oc.address) +} + +// ########################### +// ## Connections +// ########################### + +func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { + netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) + if err != nil { + return err + } + return cc.rnManager.CreateValidatorConnection(netAddr, publicKey) +} + +func (cc *ConnectionController) CreateNonValidatorPersistentOutboundConnection(ipStr string) error { + netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) + if err != nil { + return err + } + return cc.rnManager.CreateNonValidatorPersistentOutboundConnection(netAddr) +} + +func (cc *ConnectionController) CreateNonValidatorOutboundConnection(ipStr string) error { + netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) + if err != nil { + return err + } + return cc.rnManager.CreateNonValidatorOutboundConnection(netAddr) +} + +func (cc *ConnectionController) SetTargetOutboundPeers(numPeers uint32) { + cc.targetNonValidatorOutboundRemoteNodes = numPeers +} + +func (cc *ConnectionController) enoughNonValidatorInboundConnections() bool { + return uint32(cc.rnManager.GetNonValidatorInboundIndex().Count()) >= cc.targetNonValidatorInboundRemoteNodes +} + +func (cc *ConnectionController) enoughNonValidatorOutboundConnections() bool { + return uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) >= cc.targetNonValidatorOutboundRemoteNodes +} + +// processInboundConnection is called when a new inbound connection is established. At this point, the connection is not validated, +// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. +// Once the RemoteNode is created, we will initiate handshake. +func (cc *ConnectionController) processInboundConnection(conn Connection) (*RemoteNode, error) { + var ic *inboundConnection + var ok bool + if ic, ok = conn.(*inboundConnection); !ok { + return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Connection is not an inboundConnection") + } + + // Reject the peer if we have too many inbound connections already. + if cc.enoughNonValidatorInboundConnections() { + return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Rejecting INBOUND peer (%s) due to max "+ + "inbound peers (%d) hit", ic.connection.RemoteAddr().String(), cc.targetNonValidatorInboundRemoteNodes) + } + + // If we want to limit inbound connections to one per IP address, check to make sure this address isn't already connected. + if cc.limitOneInboundRemoteNodePerIP && + cc.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { + + return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Rejecting INBOUND peer (%s) due to "+ + "already having an inbound connection from the same IP with limit_one_inbound_connection_per_ip set", + ic.connection.RemoteAddr().String()) + } + + na, err := cc.ConvertIPStringToNetAddress(ic.connection.RemoteAddr().String()) + if err != nil { + return nil, errors.Wrapf(err, "ConnectionController.handleInboundConnection: Problem calling "+ + "ConvertIPStringToNetAddress for addr: (%s)", ic.connection.RemoteAddr().String()) + } + + remoteNode, err := cc.rnManager.AttachInboundConnection(ic.connection, na) + if remoteNode == nil || err != nil { + return nil, errors.Wrapf(err, "ConnectionController.handleInboundConnection: Problem calling "+ + "AttachInboundConnection for addr: (%s)", ic.connection.RemoteAddr().String()) + } + + return remoteNode, nil +} + +// processOutboundConnection is called when a new outbound connection is established. At this point, the connection is not validated, +// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. +// Once the RemoteNode is created, we will initiate handshake. +func (cc *ConnectionController) processOutboundConnection(conn Connection) (*RemoteNode, error) { + var oc *outboundConnection + var ok bool + if oc, ok = conn.(*outboundConnection); !ok { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connection is not an outboundConnection") + } + + if oc.failed { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Failed to connect to peer (%s)", + oc.address.IP.String()) + } + + if !oc.isPersistent { + cc.AddrMgr.Connected(oc.address) + cc.AddrMgr.Good(oc.address) + } + + // if this is a non-persistent outbound peer, and we already have enough outbound peers, then don't bother adding this one. + if !oc.isPersistent && cc.enoughNonValidatorOutboundConnections() { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connected to maximum number of outbound "+ + "peers (%d)", cc.targetNonValidatorOutboundRemoteNodes) + } + + // If this is a non-persistent outbound peer and the group key overlaps with another peer we're already connected to then + // abort mission. We only connect to one peer per IP group in order to prevent Sybil attacks. + if !oc.isPersistent && cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) + } + + na, err := cc.ConvertIPStringToNetAddress(oc.connection.RemoteAddr().String()) + if err != nil { + return nil, errors.Wrapf(err, "ConnectionController.handleOutboundConnection: Problem calling ipToNetAddr "+ + "for addr: (%s)", oc.connection.RemoteAddr().String()) + } + + remoteNode, err := cc.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) + if remoteNode == nil || err != nil { + return nil, errors.Wrapf(err, "ConnectionController.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ + "for addr: (%s)", oc.connection.RemoteAddr().String()) + } + return remoteNode, nil +} + +func (cc *ConnectionController) ConvertIPStringToNetAddress(ipStr string) (*wire.NetAddress, error) { + netAddr, err := IPToNetAddr(ipStr, cc.AddrMgr, cc.params) + if err != nil { + return nil, errors.Wrapf(err, + "ConnectionController.ConvertIPStringToNetAddress: Problem parsing "+ + "ipString to wire.NetAddress") + } + if netAddr == nil { + return nil, fmt.Errorf("ConnectionController.ConvertIPStringToNetAddress: " + + "address was nil after parsing") + } + return netAddr, nil +} + +func IPToNetAddr(ipStr string, addrMgr *addrmgr.AddrManager, params *DeSoParams) (*wire.NetAddress, error) { + port := params.DefaultSocketPort + host, portstr, err := net.SplitHostPort(ipStr) + if err != nil { + // No port specified so leave port=default and set + // host to the ipStr. + host = ipStr + } else { + pp, err := strconv.ParseUint(portstr, 10, 16) + if err != nil { + return nil, errors.Wrapf(err, "IPToNetAddr: Can not parse port from %s for ip", ipStr) + } + port = uint16(pp) + } + netAddr, err := addrMgr.HostToNetAddress(host, port, 0) + if err != nil { + return nil, errors.Wrapf(err, "IPToNetAddr: Can not parse port from %s for ip", ipStr) + } + return netAddr, nil +} + +func (cc *ConnectionController) isDuplicateInboundIPAddress(addr net.Addr) bool { + netAddr, err := IPToNetAddr(addr.String(), cc.AddrMgr, cc.params) + if err != nil { + // Return true in case we have an error. We do this because it + // will result in the peer connection not being accepted, which + // is desired in this case. + glog.Warningf(errors.Wrapf(err, + "ConnectionController.isDuplicateInboundIPAddress: Problem parsing "+ + "net.Addr to wire.NetAddress so marking as redundant and not "+ + "making connection").Error()) + return true + } + if netAddr == nil { + glog.Warningf("ConnectionController.isDuplicateInboundIPAddress: " + + "address was nil after parsing so marking as redundant and not " + + "making connection") + return true + } + + return cc.cmgr.IsDuplicateInboundIPAddress(netAddr) +} diff --git a/lib/connection_manager.go b/lib/connection_manager.go index 38924bdf9..7c6f510ac 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -4,7 +4,6 @@ import ( "fmt" "math" "net" - "strconv" "sync" "sync/atomic" "time" @@ -14,7 +13,6 @@ import ( "github.com/btcsuite/btcd/wire" "github.com/decred/dcrd/lru" "github.com/golang/glog" - "github.com/pkg/errors" ) // connection_manager.go contains most of the logic for creating and managing @@ -36,24 +34,10 @@ type ConnectionManager struct { // doesn't need a reference to the Server object. But for now we keep things lazy. srv *Server - // When --connectips is set, we don't connect to anything from the addrmgr. - connectIps []string - - // The address manager keeps track of peer addresses we're aware of. When - // we need to connect to a new outbound peer, it chooses one of the addresses - // it's aware of at random and provides it to us. - AddrMgr *addrmgr.AddrManager // The interfaces we listen on for new incoming connections. listeners []net.Listener // The parameters we are initialized with. params *DeSoParams - // The target number of outbound peers we want to have. - targetOutboundPeers uint32 - // The maximum number of inbound peers we allow. - maxInboundPeers uint32 - // When true, only one connection per IP is allowed. Prevents eclipse attacks - // among other things. - limitOneInboundConnectionPerIP bool // When --hypersync is set to true we will attempt fast block synchronization HyperSync bool @@ -136,10 +120,8 @@ type ConnectionManager struct { } func NewConnectionManager( - _params *DeSoParams, _addrMgr *addrmgr.AddrManager, _listeners []net.Listener, + _params *DeSoParams, _listeners []net.Listener, _connectIps []string, _timeSource chainlib.MedianTimeSource, - _targetOutboundPeers uint32, _maxInboundPeers uint32, - _limitOneInboundConnectionPerIP bool, _hyperSync bool, _syncType NodeSyncType, _stallTimeoutSeconds uint64, @@ -150,16 +132,13 @@ func NewConnectionManager( ValidateHyperSyncFlags(_hyperSync, _syncType) return &ConnectionManager{ - srv: _srv, - params: _params, - AddrMgr: _addrMgr, - listeners: _listeners, - connectIps: _connectIps, + srv: _srv, + params: _params, + listeners: _listeners, // We keep track of the last N nonces we've sent in order to detect // self connections. sentNonces: lru.NewCache(1000), timeSource: _timeSource, - //newestBlock: _newestBlock, // Initialize the peer data structures. @@ -176,15 +155,13 @@ func NewConnectionManager( newPeerChan: make(chan *Peer, 100), donePeerChan: make(chan *Peer, 100), outboundConnectionChan: make(chan *outboundConnection, 100), + inboundConnectionChan: make(chan *inboundConnection, 100), - targetOutboundPeers: _targetOutboundPeers, - maxInboundPeers: _maxInboundPeers, - limitOneInboundConnectionPerIP: _limitOneInboundConnectionPerIP, - HyperSync: _hyperSync, - SyncType: _syncType, - serverMessageQueue: _serverMessageQueue, - stallTimeoutSeconds: _stallTimeoutSeconds, - minFeeRateNanosPerKB: _minFeeRateNanosPerKB, + HyperSync: _hyperSync, + SyncType: _syncType, + serverMessageQueue: _serverMessageQueue, + stallTimeoutSeconds: _stallTimeoutSeconds, + minFeeRateNanosPerKB: _minFeeRateNanosPerKB, } } @@ -224,40 +201,6 @@ func (cmgr *ConnectionManager) subFromGroupKey(na *wire.NetAddress) { cmgr.mtxOutboundConnIPGroups.Unlock() } -func (cmgr *ConnectionManager) getRandomAddr() *wire.NetAddress { - for tries := 0; tries < 100; tries++ { - addr := cmgr.AddrMgr.GetAddress() - if addr == nil { - glog.V(2).Infof("ConnectionManager.getRandomAddr: addr from GetAddressWithExclusions was nil") - break - } - - // Lock the address map since multiple threads will be trying to read - // and modify it at the same time. - cmgr.mtxAddrsMaps.RLock() - ok := cmgr.connectedOutboundAddrs[addrmgr.NetAddressKey(addr.NetAddress())] - cmgr.mtxAddrsMaps.RUnlock() - if ok { - glog.V(2).Infof("ConnectionManager.getRandomAddr: Not choosing already connected address %v:%v", addr.NetAddress().IP, addr.NetAddress().Port) - continue - } - - // We can only have one outbound address per /16. This is similar to - // Bitcoin and we do it to prevent Sybil attacks. - if cmgr.IsFromRedundantOutboundIPAddress(addr.NetAddress()) { - glog.V(2).Infof("ConnectionManager.getRandomAddr: Not choosing address due to redundant group key %v:%v", addr.NetAddress().IP, addr.NetAddress().Port) - continue - } - - glog.V(2).Infof("ConnectionManager.getRandomAddr: Returning %v:%v at %d iterations", - addr.NetAddress().IP, addr.NetAddress().Port, tries) - return addr.NetAddress() - } - - glog.V(2).Infof("ConnectionManager.getRandomAddr: Returning nil") - return nil -} - func _delayRetry(retryCount uint64, persistentAddrForLogging *wire.NetAddress, unit time.Duration) (_retryDuration time.Duration) { // No delay if we haven't tried yet or if the number of retries isn't positive. if retryCount <= 0 { @@ -276,42 +219,6 @@ func _delayRetry(retryCount uint64, persistentAddrForLogging *wire.NetAddress, u return retryDelay } -func (cmgr *ConnectionManager) enoughOutboundPeers() bool { - val := atomic.LoadUint32(&cmgr.numOutboundPeers) - if val > cmgr.targetOutboundPeers { - glog.Errorf("enoughOutboundPeers: Connected to too many outbound "+ - "peers: (%d). Should be "+ - "no more than (%d).", val, cmgr.targetOutboundPeers) - return true - } - - if val == cmgr.targetOutboundPeers { - return true - } - return false -} - -func IPToNetAddr(ipStr string, addrMgr *addrmgr.AddrManager, params *DeSoParams) (*wire.NetAddress, error) { - port := params.DefaultSocketPort - host, portstr, err := net.SplitHostPort(ipStr) - if err != nil { - // No port specified so leave port=default and set - // host to the ipStr. - host = ipStr - } else { - pp, err := strconv.ParseUint(portstr, 10, 16) - if err != nil { - return nil, errors.Wrapf(err, "IPToNetAddr: Can not parse port from %s for ip", ipStr) - } - port = uint16(pp) - } - netAddr, err := addrMgr.HostToNetAddress(host, port, 0) - if err != nil { - return nil, errors.Wrapf(err, "IPToNetAddr: Can not parse port from %s for ip", ipStr) - } - return netAddr, nil -} - func (cmgr *ConnectionManager) IsConnectedOutboundIpAddress(netAddr *wire.NetAddress) bool { cmgr.mtxAddrsMaps.RLock() defer cmgr.mtxAddrsMaps.RUnlock() @@ -338,13 +245,15 @@ func (cmgr *ConnectionManager) RemoveAttemptedOutboundAddrs(netAddr *wire.NetAdd // DialPersistentOutboundConnection attempts to connect to a persistent peer. func (cmgr *ConnectionManager) DialPersistentOutboundConnection(persistentAddr *wire.NetAddress, attemptId uint64) (_attemptId uint64) { - glog.V(2).Infof("ConnectionManager.DialPersistentOutboundConnection: Connecting to peer %v", persistentAddr.IP.String()) + glog.V(2).Infof("ConnectionManager.DialPersistentOutboundConnection: Connecting to peer (IP=%v, Port=%v)", + persistentAddr.IP.String(), persistentAddr.Port) return cmgr._dialOutboundConnection(persistentAddr, attemptId, true) } // DialOutboundConnection attempts to connect to a non-persistent peer. func (cmgr *ConnectionManager) DialOutboundConnection(addr *wire.NetAddress, attemptId uint64) { - glog.V(2).Infof("ConnectionManager.ConnectOutboundConnection: Connecting to peer %v", addr.IP.String()) + glog.V(2).Infof("ConnectionManager.ConnectOutboundConnection: Connecting to peer (IP=%v, Port=%v)", + addr.IP.String(), addr.Port) cmgr._dialOutboundConnection(addr, attemptId, false) } @@ -400,7 +309,7 @@ func (cmgr *ConnectionManager) ConnectPeer(id uint64, conn net.Conn, na *wire.Ne return peer } -func (cmgr *ConnectionManager) IsFromRedundantInboundIPAddress(netAddr *wire.NetAddress) bool { +func (cmgr *ConnectionManager) IsDuplicateInboundIPAddress(netAddr *wire.NetAddress) bool { cmgr.mtxPeerMaps.RLock() defer cmgr.mtxPeerMaps.RUnlock() @@ -412,7 +321,7 @@ func (cmgr *ConnectionManager) IsFromRedundantInboundIPAddress(netAddr *wire.Net // nodes on a local machine. // TODO: Should this be a flag? if net.IP([]byte{127, 0, 0, 1}).Equal(netAddr.IP) { - glog.V(1).Infof("ConnectionManager._isFromRedundantInboundIPAddress: Allowing " + + glog.V(1).Infof("ConnectionManager.IsDuplicateInboundIPAddress: Allowing " + "localhost IP address to connect") return false } diff --git a/lib/constants.go b/lib/constants.go index 460b8dadb..0a525332e 100644 --- a/lib/constants.go +++ b/lib/constants.go @@ -498,6 +498,10 @@ func (pvt ProtocolVersionType) Before(version ProtocolVersionType) bool { return pvt.ToUint64() < version.ToUint64() } +func (pvt ProtocolVersionType) After(version ProtocolVersionType) bool { + return pvt.ToUint64() > version.ToUint64() +} + // DeSoParams defines the full list of possible parameters for the // DeSo network. type DeSoParams struct { @@ -564,6 +568,8 @@ type DeSoParams struct { DialTimeout time.Duration // The amount of time we wait to receive a version message from a peer. VersionNegotiationTimeout time.Duration + // The amount of time we wait to receive a verack message from a peer. + VerackNegotiationTimeout time.Duration // The maximum number of addresses to broadcast to peers. MaxAddressesToBroadcast uint32 @@ -1025,6 +1031,7 @@ var DeSoMainnetParams = DeSoParams{ DialTimeout: 30 * time.Second, VersionNegotiationTimeout: 30 * time.Second, + VerackNegotiationTimeout: 30 * time.Second, MaxAddressesToBroadcast: 10, @@ -1296,6 +1303,7 @@ var DeSoTestnetParams = DeSoParams{ DialTimeout: 30 * time.Second, VersionNegotiationTimeout: 30 * time.Second, + VerackNegotiationTimeout: 30 * time.Second, MaxAddressesToBroadcast: 10, diff --git a/lib/pos_handshake_controller.go b/lib/handshake_controller.go similarity index 96% rename from lib/pos_handshake_controller.go rename to lib/handshake_controller.go index 6f4804f2e..bde07745a 100644 --- a/lib/pos_handshake_controller.go +++ b/lib/handshake_controller.go @@ -5,12 +5,15 @@ import ( "github.com/decred/dcrd/lru" "github.com/golang/glog" "math" + "sync" ) // HandshakeController is a structure that handles the handshake process with remote nodes. It is the entry point for // initiating a handshake with a remote node. It is also responsible for handling version/verack messages from remote // nodes. And for handling the handshake complete control message. type HandshakeController struct { + mtxHandshakeComplete sync.Mutex + rnManager *RemoteNodeManager usedNonces lru.Cache } @@ -37,6 +40,10 @@ func (hc *HandshakeController) InitiateHandshake(rn *RemoteNode) { // _handleHandshakeCompleteMessage handles HandshakeComplete control messages, sent by RemoteNodes. func (hc *HandshakeController) _handleHandshakeCompleteMessage(origin *Peer, desoMsg DeSoMessage) { + // Prevent race conditions while handling handshake complete messages. + hc.mtxHandshakeComplete.Lock() + defer hc.mtxHandshakeComplete.Unlock() + if desoMsg.GetMsgType() != MsgTypePeerHandshakeComplete { return } diff --git a/lib/network.go b/lib/network.go index 80d412c4f..75474ea7c 100644 --- a/lib/network.go +++ b/lib/network.go @@ -1543,8 +1543,7 @@ func (msg *MsgDeSoPong) FromBytes(data []byte) error { type ServiceFlag uint64 const ( - // SFFullNodeDeprecated is deprecated, and set on all nodes by default - // now. We basically split it into SFHyperSync and SFArchivalMode. + // SFFullNodeDeprecated is deprecated, and set on all nodes by default now. SFFullNodeDeprecated ServiceFlag = 1 << 0 // SFHyperSync is a flag used to indicate that the peer supports hyper sync. SFHyperSync ServiceFlag = 1 << 1 @@ -1555,6 +1554,10 @@ const ( SFPosValidator ServiceFlag = 1 << 3 ) +func (sf ServiceFlag) HasService(serviceFlag ServiceFlag) bool { + return sf&serviceFlag == serviceFlag +} + type MsgDeSoVersion struct { // What is the current version we're on? Version uint64 @@ -1952,10 +1955,6 @@ func (msg *MsgDeSoVerack) EncodeVerackV0() ([]byte, error) { } func (msg *MsgDeSoVerack) EncodeVerackV1() ([]byte, error) { - if msg.PublicKey == nil || msg.Signature == nil { - return nil, fmt.Errorf("MsgDeSoVerack.EncodeVerackV1: PublicKey and Signature must be set for V1 message") - } - retBytes := []byte{} // Version diff --git a/lib/network_test.go b/lib/network_test.go index 8a971f75a..c0f721a99 100644 --- a/lib/network_test.go +++ b/lib/network_test.go @@ -93,12 +93,12 @@ func TestVerackV1(t *testing.T) { require := require.New(t) networkType := NetworkType_MAINNET - var buf bytes.Buffer + var buf1, buf2 bytes.Buffer nonceReceived := uint64(12345678910) nonceSent := nonceReceived + 1 tstamp := uint64(2345678910) - // First, test that nil public key and signature are not allowed. + // First, test that nil public key and signature are allowed. msg := &MsgDeSoVerack{ Version: VerackVersion1, NonceReceived: nonceReceived, @@ -107,8 +107,8 @@ func TestVerackV1(t *testing.T) { PublicKey: nil, Signature: nil, } - _, err := WriteMessage(&buf, msg, networkType) - require.Error(err) + _, err := WriteMessage(&buf1, msg, networkType) + require.NoError(err) payload := append(UintToBuf(nonceReceived), UintToBuf(nonceSent)...) payload = append(payload, UintToBuf(tstamp)...) hash := sha3.Sum256(payload) @@ -118,10 +118,10 @@ func TestVerackV1(t *testing.T) { msg.PublicKey = priv.PublicKey() msg.Signature, err = priv.Sign(hash[:]) require.NoError(err) - _, err = WriteMessage(&buf, msg, networkType) + _, err = WriteMessage(&buf2, msg, networkType) require.NoError(err) - verBytes := buf.Bytes() + verBytes := buf2.Bytes() testMsg, _, err := ReadMessage(bytes.NewReader(verBytes), networkType) require.NoError(err) require.Equal(msg, testMsg) diff --git a/lib/peer.go b/lib/peer.go index 98d2c135e..0af9aa0b7 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -1192,11 +1192,12 @@ func (pp *Peer) Start() { // If the address manager needs more addresses, then send a GetAddr message // to the peer. This is best-effort. if pp.cmgr != nil { - if pp.cmgr.AddrMgr.NeedMoreAddresses() { + // TODO: Move this to ConnectionController. + /*if pp.cmgr.AddrMgr.NeedMoreAddresses() { go func() { pp.QueueMessage(&MsgDeSoGetAddr{}) }() - } + }*/ } // Send our verack message now that the IO processing machinery has started. diff --git a/lib/remote_node.go b/lib/remote_node.go index a357118a2..f2d849a36 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -191,6 +191,10 @@ func (rn *RemoteNode) GetValidatorPublicKey() *bls.PublicKey { return rn.validatorPublicKey } +func (rn *RemoteNode) GetServiceFlag() ServiceFlag { + return rn.handshakeMetadata.serviceFlag +} + func (rn *RemoteNode) GetUserAgent() string { return rn.handshakeMetadata.userAgent } @@ -223,7 +227,11 @@ func (rn *RemoteNode) IsValidator() bool { if !rn.IsHandshakeCompleted() { return false } - return rn.GetValidatorPublicKey() != nil + return rn.hasValidatorServiceFlag() +} + +func (rn *RemoteNode) hasValidatorServiceFlag() bool { + return rn.GetServiceFlag().HasService(SFPosValidator) } // DialOutboundConnection dials an outbound connection to the provided netAddr. @@ -359,7 +367,7 @@ func (rn *RemoteNode) sendVersionMessage(nonce uint64) error { return nil } -// newVersionMessage returns a new version message that can be sent to a RemoteNode peer. The message will contain the +// newVersionMessage returns a new version message that can be sent to a RemoteNode. The message will contain the // nonce that is passed in as an argument. func (rn *RemoteNode) newVersionMessage(nonce uint64) *MsgDeSoVersion { ver := NewMessage(MsgTypeVersion).(*MsgDeSoVersion) @@ -412,6 +420,12 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce vMeta := rn.handshakeMetadata // Record the version the peer is using. vMeta.advertisedProtocolVersion = NewProtocolVersionType(verMsg.Version) + // Make sure the latest supported protocol version is ProtocolVersion2. + if vMeta.advertisedProtocolVersion.After(ProtocolVersion2) { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v) "+ + "protocol version too high. Peer version: %v, max version: %v", rn.id, verMsg.Version, ProtocolVersion2) + } + // Decide on the protocol version to use for this connection. negotiatedVersion := rn.params.ProtocolVersion if verMsg.Version < rn.params.ProtocolVersion.ToUint64() { @@ -430,6 +444,17 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce // Record the services the peer is advertising. vMeta.serviceFlag = verMsg.Services + // If the RemoteNode was connected with an expectation of being a validator, make sure that its advertised ServiceFlag + // indicates that it is a validator. + if !rn.hasValidatorServiceFlag() && rn.validatorPublicKey != nil { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v). "+ + "Expected validator, but received invalid ServiceFlag: %v", rn.id, verMsg.Services) + } + // If the RemoteNode is on ProtocolVersion1, then it must not have the validator service flag set. + if rn.hasValidatorServiceFlag() && vMeta.advertisedProtocolVersion.Before(ProtocolVersion2) { + return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v). "+ + "RemoteNode has SFValidator service flag, but doesn't have ProtocolVersion2 or later", rn.id) + } // Record the tstamp sent by the peer and calculate the time offset. timeConnected := time.Unix(verMsg.TstampSecs, 0) @@ -450,7 +475,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce vMeta.minTxFeeRateNanosPerKB = verMsg.MinFeeRateNanosPerKB // Respond to the version message if this is an inbound peer. - if !rn.IsOutbound() { + if rn.IsInbound() { if err := rn.sendVersionMessage(responseNonce); err != nil { return errors.Wrapf(err, "RemoteNode.HandleVersionMessage: Problem sending version message to peer (id= %d)", rn.id) } @@ -460,7 +485,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce // peer's verack message even if it is an inbound peer. Instead, we just send the verack message right away. // Set the latest time by which we should receive a verack message from the peer. - verackTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + verackTimeExpected := time.Now().Add(rn.params.VerackNegotiationTimeout) rn.verackTimeExpected = &verackTimeExpected if err := rn.sendVerack(); err != nil { return errors.Wrapf(err, "RemoteNode.HandleVersionMessage: Problem sending verack message to peer (id= %d)", rn.id) @@ -496,7 +521,6 @@ func (rn *RemoteNode) newVerackMessage() (*MsgDeSoVerack, error) { verack.Version = VerackVersion0 verack.NonceReceived = vMeta.versionNonceReceived case ProtocolVersion2: - // FIXME: resolve the non-validator - validator handshake issues on protocol version 2. // For protocol version 2, we need to send the nonce we received from the peer in their version message. // We also need to send our own nonce, which we generate for our version message. In addition, we need to // send a current timestamp (in microseconds). We then sign the tuple of (nonceReceived, nonceSent, tstampMicro) @@ -507,6 +531,10 @@ func (rn *RemoteNode) newVerackMessage() (*MsgDeSoVerack, error) { verack.NonceSent = vMeta.versionNonceSent tstampMicro := uint64(time.Now().UnixMicro()) verack.TstampMicro = tstampMicro + // If the RemoteNode is not a validator, then we don't need to sign the verack message. + if !rn.nodeServices.HasService(SFPosValidator) { + break + } verack.PublicKey = rn.keystore.GetSigner().GetPublicKey() verack.Signature, err = rn.keystore.GetSigner().SignPoSValidatorHandshake(verack.NonceSent, verack.NonceReceived, tstampMicro) if err != nil { @@ -599,6 +627,11 @@ func (rn *RemoteNode) validateVerackPoS(vrkMsg *MsgDeSoVerack) error { "verack timestamp too far in the past. Time now: %v, verack timestamp: %v", rn.id, timeNowMicro, vrkMsg.TstampMicro) } + // If the RemoteNode is not a validator, then we don't need to verify the verack message's signature. + if !rn.hasValidatorServiceFlag() { + return nil + } + // Make sure the verack message's public key and signature are not nil. if vrkMsg.PublicKey == nil || vrkMsg.Signature == nil { return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ @@ -617,7 +650,7 @@ func (rn *RemoteNode) validateVerackPoS(vrkMsg *MsgDeSoVerack) error { "verack signature verification failed", rn.id) } - if rn.validatorPublicKey != nil || rn.validatorPublicKey.Serialize() != vrkMsg.PublicKey.Serialize() { + if rn.validatorPublicKey != nil && rn.validatorPublicKey.Serialize() != vrkMsg.PublicKey.Serialize() { return fmt.Errorf("RemoteNode.validateVerackPoS: Requesting disconnect for id: (%v) "+ "verack public key mismatch; message: %v; expected: %v", rn.id, vrkMsg.PublicKey, rn.validatorPublicKey) } diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index a41fe4606..fb269d072 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -5,8 +5,10 @@ import ( "github.com/btcsuite/btcd/wire" "github.com/deso-protocol/core/bls" "github.com/deso-protocol/core/collections" + "github.com/golang/glog" "github.com/pkg/errors" "net" + "sync" "sync/atomic" ) @@ -14,6 +16,8 @@ import ( // and stopping remote node connections. It is also responsible for organizing the remote nodes into indices for easy // access, through the RemoteNodeIndexer. type RemoteNodeManager struct { + mtx sync.Mutex + // remoteNodeIndexer is a structure that stores and indexes all created remote nodes. remoteNodeIndexer *RemoteNodeIndexer @@ -62,13 +66,19 @@ func (manager *RemoteNodeManager) ProcessCompletedHandshake(remoteNode *RemoteNo if remoteNode.IsValidator() { manager.SetValidator(remoteNode) + manager.UnsetNonValidator(remoteNode) } else { + manager.UnsetValidator(remoteNode) manager.SetNonValidator(remoteNode) } manager.srv.HandleAcceptedPeer(remoteNode.GetPeer()) } func (manager *RemoteNodeManager) Disconnect(rn *RemoteNode) { + if rn == nil { + return + } + glog.V(2).Infof("RemoteNodeManager.Disconnect: Disconnecting from remote node %v", rn.GetId()) rn.Disconnect() manager.removeRemoteNodeFromIndexer(rn) } @@ -83,17 +93,29 @@ func (manager *RemoteNodeManager) DisconnectById(id RemoteNodeId) { } func (manager *RemoteNodeManager) removeRemoteNodeFromIndexer(rn *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if rn == nil { return } indexer := manager.remoteNodeIndexer indexer.GetAllRemoteNodes().Remove(rn.GetId()) - if rn.validatorPublicKey != nil { - indexer.GetValidatorIndex().Remove(rn.validatorPublicKey.Serialize()) - } indexer.GetNonValidatorOutboundIndex().Remove(rn.GetId()) indexer.GetNonValidatorInboundIndex().Remove(rn.GetId()) + + // Try to evict the remote node from the validator index. If the remote node is not a validator, then there is nothing to do. + if rn.GetValidatorPublicKey() == nil { + return + } + // Only remove from the validator index if the fetched remote node is the same as the one we are trying to remove. + // Otherwise, we could have a fun edge-case where a duplicated validator connection ends up removing an + // existing validator connection from the index. + fetchedRn, ok := indexer.GetValidatorIndex().Get(rn.GetValidatorPublicKey().Serialize()) + if ok && fetchedRn.GetId() == rn.GetId() { + indexer.GetValidatorIndex().Remove(rn.GetValidatorPublicKey().Serialize()) + } } func (manager *RemoteNodeManager) SendMessage(rn *RemoteNode, desoMessage DeSoMessage) error { @@ -113,6 +135,10 @@ func (manager *RemoteNodeManager) CreateValidatorConnection(netAddr *wire.NetAdd return fmt.Errorf("RemoteNodeManager.CreateValidatorConnection: netAddr or public key is nil") } + if _, ok := manager.GetValidatorIndex().Get(publicKey.Serialize()); ok { + return fmt.Errorf("RemoteNodeManager.CreateValidatorConnection: RemoteNode already exists for public key: %v", publicKey) + } + remoteNode := manager.newRemoteNode(publicKey) if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { return errors.Wrapf(err, "RemoteNodeManager.CreateValidatorConnection: Problem calling DialPersistentOutboundConnection "+ @@ -190,6 +216,9 @@ func (manager *RemoteNodeManager) AttachOutboundConnection(conn net.Conn, na *wi // ########################### func (manager *RemoteNodeManager) setRemoteNode(rn *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if rn == nil { return } @@ -198,36 +227,39 @@ func (manager *RemoteNodeManager) setRemoteNode(rn *RemoteNode) { } func (manager *RemoteNodeManager) SetNonValidator(rn *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if rn == nil { return } if rn.IsOutbound() { manager.GetNonValidatorOutboundIndex().Set(rn.GetId(), rn) - } else if rn.IsInbound() { - manager.GetNonValidatorInboundIndex().Set(rn.GetId(), rn) } else { - manager.Disconnect(rn) - return + manager.GetNonValidatorInboundIndex().Set(rn.GetId(), rn) } - - manager.UnsetValidator(rn) } func (manager *RemoteNodeManager) SetValidator(remoteNode *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if remoteNode == nil { return } pk := remoteNode.GetValidatorPublicKey() if pk == nil { - manager.Disconnect(remoteNode) return } manager.GetValidatorIndex().Set(pk.Serialize(), remoteNode) } func (manager *RemoteNodeManager) UnsetValidator(remoteNode *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if remoteNode == nil { return } @@ -240,16 +272,17 @@ func (manager *RemoteNodeManager) UnsetValidator(remoteNode *RemoteNode) { } func (manager *RemoteNodeManager) UnsetNonValidator(rn *RemoteNode) { + manager.mtx.Lock() + defer manager.mtx.Unlock() + if rn == nil { return } if rn.IsOutbound() { manager.GetNonValidatorOutboundIndex().Remove(rn.GetId()) - } else if rn.IsInbound() { - manager.GetNonValidatorInboundIndex().Remove(rn.GetId()) } else { - manager.Disconnect(rn) + manager.GetNonValidatorInboundIndex().Remove(rn.GetId()) } } diff --git a/lib/server.go b/lib/server.go index d1c82e5b3..d4c371955 100644 --- a/lib/server.go +++ b/lib/server.go @@ -62,7 +62,9 @@ type Server struct { eventManager *EventManager TxIndex *TXIndex + handshakeController *HandshakeController // fastHotStuffEventLoop consensus.FastHotStuffEventLoop + connectionController *ConnectionController // posMempool *PosMemPool TODO: Add the mempool later // All messages received from peers get sent from the ConnectionManager to the @@ -175,6 +177,10 @@ func (srv *Server) ResetRequestQueues() { srv.requestedTransactionsMap = make(map[BlockHash]*GetDataRequestInfo) } +func (srv *Server) GetConnectionController() *ConnectionController { + return srv.connectionController +} + // dataLock must be acquired for writing before calling this function. func (srv *Server) _removeRequest(hash *BlockHash) { // Just be lazy and remove the hash from everything indiscriminately to @@ -445,8 +451,7 @@ func NewServer( // Create a new connection manager but note that it won't be initialized until Start(). _incomingMessages := make(chan *ServerMessage, (_targetOutboundPeers+_maxInboundPeers)*3) _cmgr := NewConnectionManager( - _params, _desoAddrMgr, _listeners, _connectIps, timesource, - _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP, + _params, _listeners, _connectIps, timesource, _hyperSync, _syncType, _stallTimeoutSeconds, _minFeeRateNanosPerKB, _incomingMessages, srv) @@ -481,6 +486,22 @@ func NewServer( hex.EncodeToString(_chain.blockTip().Hash[:]), hex.EncodeToString(BigintToHash(_chain.blockTip().CumWork)[:])) + nodeServices := SFFullNodeDeprecated + if _hyperSync { + nodeServices |= SFHyperSync + } + if archivalMode { + nodeServices |= SFArchivalNode + } + if _blsKeystore != nil { + nodeServices |= SFPosValidator + } + rnManager := NewRemoteNodeManager(srv, _chain, _cmgr, _blsKeystore, _params, _minFeeRateNanosPerKB, nodeServices) + + srv.handshakeController = NewHandshakeController(rnManager) + srv.connectionController = NewConnectionController(_params, _cmgr, srv.handshakeController, rnManager, + _blsKeystore, _desoAddrMgr, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) + if srv.stateChangeSyncer != nil { srv.stateChangeSyncer.BlockHeight = uint64(_chain.headerTip().Height) } @@ -2176,7 +2197,9 @@ func (srv *Server) _handleAddrMessage(pp *Peer, msg *MsgDeSoAddr) { netAddrsReceived = append( netAddrsReceived, addrAsNetAddr) } - srv.cmgr.AddrMgr.AddAddresses(netAddrsReceived, pp.netAddr) + // TODO: temporary + addressMgr := addrmgr.New("", net.LookupIP) + addressMgr.AddAddresses(netAddrsReceived, pp.netAddr) // If the message had <= 10 addrs in it, then queue all the addresses for relaying // on the next cycle. @@ -2207,7 +2230,9 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { glog.V(1).Infof("Server._handleGetAddrMessage: Received GetAddr from peer %v", pp) // When we get a GetAddr message, choose MaxAddrsPerMsg from the AddrMgr // and send them back to the peer. - netAddrsFound := srv.cmgr.AddrMgr.AddressCache() + // TODO: temporary + addressMgr := addrmgr.New("", net.LookupIP) + netAddrsFound := addressMgr.AddressCache() if len(netAddrsFound) > MaxAddrsPerAddrMsg { netAddrsFound = netAddrsFound[:MaxAddrsPerAddrMsg] } @@ -2230,9 +2255,12 @@ func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_should switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. case *MsgDeSoPeerHandshakeComplete: - break + srv.handshakeController._handleHandshakeCompleteMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) + srv.connectionController._handleDonePeerMessage(serverMessage.Peer, serverMessage.Msg) + case *MsgDeSoNewConnection: + srv.connectionController._handleNewConnectionMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoQuit: return true } @@ -2244,6 +2272,10 @@ func (srv *Server) _handlePeerMessages(serverMessage *ServerMessage) { // Handle all non-control message types from our Peers. switch msg := serverMessage.Msg.(type) { // Messages sent among peers. + case *MsgDeSoAddr: + srv.connectionController._handleAddrMessage(serverMessage.Peer, serverMessage.Msg) + case *MsgDeSoGetAddr: + srv.connectionController._handleGetAddrMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoGetHeaders: srv._handleGetHeaders(serverMessage.Peer, msg) case *MsgDeSoHeaderBundle: @@ -2266,6 +2298,10 @@ func (srv *Server) _handlePeerMessages(serverMessage *ServerMessage) { srv._handleMempool(serverMessage.Peer, msg) case *MsgDeSoInv: srv._handleInv(serverMessage.Peer, msg) + case *MsgDeSoVersion: + srv.handshakeController._handleVersionMessage(serverMessage.Peer, serverMessage.Msg) + case *MsgDeSoVerack: + srv.handshakeController._handleVerackMessage(serverMessage.Peer, serverMessage.Msg) } } @@ -2443,10 +2479,12 @@ func (srv *Server) _startAddressRelayer() { } // For the first ten minutes after the server starts, relay our address to all // peers. After the first ten minutes, do it once every 24 hours. + // TODO: temporary + addressMgr := addrmgr.New("", net.LookupIP) glog.V(1).Infof("Server.Start._startAddressRelayer: Relaying our own addr to peers") if numMinutesPassed < 10 || numMinutesPassed%(RebroadcastNodeAddrIntervalMinutes) == 0 { for _, pp := range srv.cmgr.GetAllPeers() { - bestAddress := srv.cmgr.AddrMgr.GetBestLocalAddress(pp.netAddr) + bestAddress := addressMgr.GetBestLocalAddress(pp.netAddr) if bestAddress != nil { glog.V(2).Infof("Server.Start._startAddressRelayer: Relaying address %v to "+ "peer %v", bestAddress.IP.String(), pp) From 21ba1dd12f8feb401c216424a4f74d6f9514e9ca Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:40:13 -0500 Subject: [PATCH 08/37] Add validator deduplication test (#942) --- .../connection_controller_routines_test.go | 193 ++++++++++++++++++ .../connection_controller_test.go | 12 +- .../connection_controller_utils_test.go | 52 +++-- integration_testing/tools.go | 2 +- lib/connection_controller.go | 125 +++++++++++- lib/connection_manager.go | 26 ++- lib/handshake_controller.go | 2 +- lib/network_connection.go | 8 +- lib/remote_node.go | 4 + lib/remote_node_manager.go | 22 +- lib/server.go | 9 +- 11 files changed, 395 insertions(+), 60 deletions(-) create mode 100644 integration_testing/connection_controller_routines_test.go diff --git a/integration_testing/connection_controller_routines_test.go b/integration_testing/connection_controller_routines_test.go new file mode 100644 index 000000000..582031bd8 --- /dev/null +++ b/integration_testing/connection_controller_routines_test.go @@ -0,0 +1,193 @@ +package integration_testing + +import ( + "fmt" + "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/cmd" + "github.com/deso-protocol/core/collections" + "github.com/deso-protocol/core/lib" + "github.com/stretchr/testify/require" + "testing" +) + +func TestConnectionControllerInitiatePersistentConnections(t *testing.T) { + require := require.New(t) + t.Cleanup(func() { + setGetActiveValidatorImpl(lib.BasicGetActiveValidators) + }) + + // NonValidator Node1 will set its --connect-ips to two non-validators node2 and node3, + // and two validators node4 and node5. + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsPriv5, err := bls.NewPrivateKey() + require.NoError(err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + + node2 = startNode(t, node2) + node3 = startNode(t, node3) + node4 = startNode(t, node4) + node5 = startNode(t, node5) + + setGetActiveValidatorImplWithValidatorNodes(t, node4, node5) + + node1.Config.ConnectIPs = []string{ + node2.Listeners[0].Addr().String(), + node3.Listeners[0].Addr().String(), + node4.Listeners[0].Addr().String(), + node5.Listeners[0].Addr().String(), + } + node1 = startNode(t, node1) + waitForNonValidatorOutboundConnection(t, node1, node2) + waitForNonValidatorOutboundConnection(t, node1, node3) + waitForValidatorConnection(t, node1, node4) + waitForValidatorConnection(t, node1, node5) + waitForValidatorConnection(t, node4, node5) + waitForCountRemoteNodeIndexer(t, node1, 4, 2, 2, 0) + waitForCountRemoteNodeIndexer(t, node2, 1, 0, 0, 1) + waitForCountRemoteNodeIndexer(t, node3, 1, 0, 0, 1) + waitForCountRemoteNodeIndexer(t, node4, 2, 1, 0, 1) + waitForCountRemoteNodeIndexer(t, node5, 2, 1, 0, 1) + node1.Stop() + t.Logf("Test #1 passed | Successfully run non-validator node1 with --connect-ips set to node2, node3, node4, node5") + + // Now try again with a validator node6, with connect-ips set to node2, node3, node4, node5. + blsPriv6, err := bls.NewPrivateKey() + require.NoError(err) + node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsPriv6) + node6.Config.ConnectIPs = []string{ + node2.Listeners[0].Addr().String(), + node3.Listeners[0].Addr().String(), + node4.Listeners[0].Addr().String(), + node5.Listeners[0].Addr().String(), + } + node6 = startNode(t, node6) + setGetActiveValidatorImplWithValidatorNodes(t, node4, node5, node6) + waitForNonValidatorOutboundConnection(t, node6, node2) + waitForNonValidatorOutboundConnection(t, node6, node3) + waitForValidatorConnection(t, node6, node4) + waitForValidatorConnection(t, node6, node5) + waitForValidatorConnection(t, node4, node5) + waitForCountRemoteNodeIndexer(t, node6, 4, 2, 2, 0) + waitForCountRemoteNodeIndexer(t, node2, 1, 1, 0, 0) + waitForCountRemoteNodeIndexer(t, node3, 1, 1, 0, 0) + waitForCountRemoteNodeIndexer(t, node4, 2, 2, 0, 0) + waitForCountRemoteNodeIndexer(t, node5, 2, 2, 0, 0) + node2.Stop() + node3.Stop() + node4.Stop() + node5.Stop() + node6.Stop() + t.Logf("Test #2 passed | Successfully run validator node6 with --connect-ips set to node2, node3, node4, node5") +} + +func TestConnectionControllerNonValidatorCircularConnectIps(t *testing.T) { + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + + node1.Config.ConnectIPs = []string{"127.0.0.1:18001"} + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + + node1 = startNode(t, node1) + node2 = startNode(t, node2) + defer node1.Stop() + defer node2.Stop() + + waitForCountRemoteNodeIndexer(t, node1, 2, 0, 1, 1) + waitForCountRemoteNodeIndexer(t, node2, 2, 0, 1, 1) +} + +func setGetActiveValidatorImplWithValidatorNodes(t *testing.T, validators ...*cmd.Node) { + require := require.New(t) + + mapping := collections.NewConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry]() + for _, validator := range validators { + seed := validator.Config.PosValidatorSeed + if seed == "" { + t.Fatalf("Validator node %s does not have a PosValidatorSeed set", validator.Params.UserAgent) + } + keystore, err := lib.NewBLSKeystore(seed) + require.NoError(err) + mapping.Set(keystore.GetSigner().GetPublicKey().Serialize(), createSimpleValidatorEntry(validator)) + } + setGetActiveValidatorImpl(func() *collections.ConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry] { + return mapping + }) +} + +func setGetActiveValidatorImpl(mapping func() *collections.ConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry]) { + lib.GetActiveValidatorImpl = mapping +} + +func createSimpleValidatorEntry(node *cmd.Node) *lib.ValidatorEntry { + return &lib.ValidatorEntry{ + Domains: [][]byte{[]byte(node.Listeners[0].Addr().String())}, + } +} + +func waitForValidatorFullGraph(t *testing.T, validators ...*cmd.Node) { + for ii := 0; ii < len(validators); ii++ { + waitForValidatorConnectionOneWay(t, validators[ii], validators[ii+1:]...) + } +} + +func waitForValidatorConnectionOneWay(t *testing.T, n *cmd.Node, validators ...*cmd.Node) { + if len(validators) == 0 { + return + } + for _, validator := range validators { + waitForValidatorConnection(t, n, validator) + } +} + +func waitForNonValidatorInboundXOROutboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { + userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + conditionInbound := conditionNonValidatorInboundConnectionDynamic(t, node1, node2, true) + conditionOutbound := conditionNonValidatorOutboundConnectionDynamic(t, node1, node2, true) + xorCondition := func() bool { + return conditionInbound() != conditionOutbound() + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound XOR outbound non-validator Node (%s)", + userAgentN1, userAgentN2), xorCondition) +} + +func waitForMinNonValidatorCountRemoteNodeIndexer(t *testing.T, node *cmd.Node, allCount int, validatorCount int, + minNonValidatorOutboundCount int, minNonValidatorInboundCount int) { + + userAgent := node.Params.UserAgent + rnManager := node.Server.GetConnectionController().GetRemoteNodeManager() + condition := func() bool { + return checkRemoteNodeIndexerMinNonValidatorCount(rnManager, allCount, validatorCount, + minNonValidatorOutboundCount, minNonValidatorInboundCount) + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have at least %d non-validator outbound nodes and %d non-validator inbound nodes", + userAgent, minNonValidatorOutboundCount, minNonValidatorInboundCount), condition) +} + +func checkRemoteNodeIndexerMinNonValidatorCount(manager *lib.RemoteNodeManager, allCount int, validatorCount int, + minNonValidatorOutboundCount int, minNonValidatorInboundCount int) bool { + + if allCount != manager.GetAllRemoteNodes().Count() { + return false + } + if validatorCount != manager.GetValidatorIndex().Count() { + return false + } + if minNonValidatorOutboundCount > manager.GetNonValidatorOutboundIndex().Count() { + return false + } + if minNonValidatorInboundCount > manager.GetNonValidatorInboundIndex().Count() { + return false + } + if allCount != manager.GetValidatorIndex().Count()+ + manager.GetNonValidatorOutboundIndex().Count()+ + manager.GetNonValidatorInboundIndex().Count() { + return false + } + return true +} diff --git a/integration_testing/connection_controller_test.go b/integration_testing/connection_controller_test.go index 01fb01046..58f4be33b 100644 --- a/integration_testing/connection_controller_test.go +++ b/integration_testing/connection_controller_test.go @@ -396,7 +396,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { // Create a persistent connection from Node1 to Node2 cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String())) + _, err = cc.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String()) + require.NoError(err) waitForValidatorConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) node2.Stop() @@ -408,7 +409,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { node3 = startNode(t, node3) // Create a persistent connection from Node1 to Node3 - require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String())) + _, err = cc.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String()) + require.NoError(err) waitForNonValidatorOutboundConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) node3.Stop() @@ -429,7 +431,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { // Create a persistent connection from Node4 to Node5 cc = node4.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String())) + _, err = cc.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String()) + require.NoError(err) waitForNonValidatorOutboundConnection(t, node4, node5) waitForValidatorConnection(t, node5, node4) node5.Stop() @@ -444,7 +447,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { defer node6.Stop() // Create a persistent connection from Node4 to Node6 - require.NoError(cc.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String())) + _, err = cc.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String()) + require.NoError(err) waitForValidatorConnection(t, node4, node6) waitForValidatorConnection(t, node6, node4) t.Logf("Test #4 passed | Successfuly created persistent connection from validator Node4 to validator Node6") diff --git a/integration_testing/connection_controller_utils_test.go b/integration_testing/connection_controller_utils_test.go index 4d5594634..74a33b943 100644 --- a/integration_testing/connection_controller_utils_test.go +++ b/integration_testing/connection_controller_utils_test.go @@ -26,14 +26,24 @@ func waitForValidatorConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) } return true } - waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to outbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) } func waitForNonValidatorOutboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + condition := conditionNonValidatorOutboundConnection(t, node1, node2) + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to outbound non-validator Node (%s)", userAgentN1, userAgentN2), condition) +} + +func conditionNonValidatorOutboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) func() bool { + return conditionNonValidatorOutboundConnectionDynamic(t, node1, node2, false) +} + +func conditionNonValidatorOutboundConnectionDynamic(t *testing.T, node1 *cmd.Node, node2 *cmd.Node, inactiveValidator bool) func() bool { userAgentN2 := node2.Params.UserAgent rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() - n1ValidatedN2 := func() bool { + return func() bool { if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, true, false) { return false } @@ -44,19 +54,29 @@ func waitForNonValidatorOutboundConnection(t *testing.T, node1 *cmd.Node, node2 if !rnFromN2.IsHandshakeCompleted() { return false } - if rnFromN2.GetValidatorPublicKey() != nil { - return false + // inactiveValidator should have the public key. + if inactiveValidator { + return rnFromN2.GetValidatorPublicKey() != nil } - return true + return rnFromN2.GetValidatorPublicKey() == nil } - waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to outbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) } func waitForNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + condition := conditionNonValidatorInboundConnection(t, node1, node2) + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound non-validator Node (%s)", userAgentN1, userAgentN2), condition) +} + +func conditionNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) func() bool { + return conditionNonValidatorInboundConnectionDynamic(t, node1, node2, false) +} + +func conditionNonValidatorInboundConnectionDynamic(t *testing.T, node1 *cmd.Node, node2 *cmd.Node, inactiveValidator bool) func() bool { userAgentN2 := node2.Params.UserAgent rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() - n1ValidatedN2 := func() bool { + return func() bool { if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, false, true) { return false } @@ -67,12 +87,12 @@ func waitForNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 * if !rnFromN2.IsHandshakeCompleted() { return false } - if rnFromN2.GetValidatorPublicKey() != nil { - return false + // inactiveValidator should have the public key. + if inactiveValidator { + return rnFromN2.GetValidatorPublicKey() != nil } - return true + return rnFromN2.GetValidatorPublicKey() == nil } - waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound non-validator Node (%s)", userAgentN1, userAgentN2), n1ValidatedN2) } func waitForEmptyRemoteNodeIndexer(t *testing.T, node1 *cmd.Node) { @@ -90,15 +110,15 @@ func waitForEmptyRemoteNodeIndexer(t *testing.T, node1 *cmd.Node) { func waitForCountRemoteNodeIndexer(t *testing.T, node1 *cmd.Node, allCount int, validatorCount int, nonValidatorOutboundCount int, nonValidatorInboundCount int) { - userAgentN1 := node1.Params.UserAgent - rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() - n1ValidatedN2 := func() bool { - if true != checkRemoteNodeIndexerCount(rnManagerN1, allCount, validatorCount, nonValidatorOutboundCount, nonValidatorInboundCount) { + userAgent := node1.Params.UserAgent + rnManager := node1.Server.GetConnectionController().GetRemoteNodeManager() + condition := func() bool { + if true != checkRemoteNodeIndexerCount(rnManager, allCount, validatorCount, nonValidatorOutboundCount, nonValidatorInboundCount) { return false } return true } - waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have appropriate RemoteNodes counts", userAgentN1), n1ValidatedN2) + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have appropriate RemoteNodes counts", userAgent), condition) } func checkRemoteNodeIndexerUserAgent(manager *lib.RemoteNodeManager, userAgent string, validator bool, diff --git a/integration_testing/tools.go b/integration_testing/tools.go index 2f97e942d..4db913136 100644 --- a/integration_testing/tools.go +++ b/integration_testing/tools.go @@ -69,7 +69,7 @@ func generateConfig(t *testing.T, port uint32, dataDir string, maxPeers uint32) config.MaxSyncBlockHeight = 0 config.ConnectIPs = []string{} config.PrivateMode = true - config.GlogV = 0 + config.GlogV = 2 config.GlogVmodule = "*bitcoin_manager*=0,*balance*=0,*view*=0,*frontend*=0,*peer*=0,*addr*=0,*network*=0,*utils*=0,*connection*=0,*main*=0,*server*=0,*mempool*=0,*miner*=0,*blockchain*=0" config.MaxInboundPeers = maxPeers config.TargetOutboundPeers = maxPeers diff --git a/lib/connection_controller.go b/lib/connection_controller.go index fef9fa887..18d423f46 100644 --- a/lib/connection_controller.go +++ b/lib/connection_controller.go @@ -5,12 +5,23 @@ import ( "github.com/btcsuite/btcd/addrmgr" "github.com/btcsuite/btcd/wire" "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/collections" "github.com/golang/glog" "github.com/pkg/errors" "net" "strconv" + "sync" + "time" ) +type GetActiveValidatorsFunc func() *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry] + +var GetActiveValidatorImpl GetActiveValidatorsFunc = BasicGetActiveValidators + +func BasicGetActiveValidators() *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry] { + return collections.NewConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]() +} + // ConnectionController is a structure that oversees all connections to remote nodes. It is responsible for kicking off // the initial connections a node makes to the network. It is also responsible for creating RemoteNodes from all // successful outbound and inbound connections. The ConnectionController also ensures that the node is connected to @@ -32,8 +43,12 @@ type ConnectionController struct { // it's aware of at random and provides it to us. AddrMgr *addrmgr.AddrManager - // When --connectips is set, we don't connect to anything from the addrmgr. + // When --connect-ips is set, we don't connect to anything from the addrmgr. connectIps []string + // persistentIpToRemoteNodeIdsMap maps persistent IP addresses, like the --connect-ips, to the RemoteNodeIds of the + // corresponding RemoteNodes. This is used to ensure that we don't connect to the same persistent IP address twice. + // And that we can reconnect to the same persistent IP address if we disconnect from it. + persistentIpToRemoteNodeIdsMap map[string]RemoteNodeId // The target number of non-validator outbound remote nodes we want to have. We will disconnect remote nodes once // we've exceeded this number of outbound connections. @@ -44,11 +59,16 @@ type ConnectionController struct { // When true, only one connection per IP is allowed. Prevents eclipse attacks // among other things. limitOneInboundRemoteNodePerIP bool + + startGroup sync.WaitGroup + exitChan chan struct{} + exitGroup sync.WaitGroup } func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handshakeController *HandshakeController, - rnManager *RemoteNodeManager, blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, targetNonValidatorOutboundRemoteNodes uint32, - targetNonValidatorInboundRemoteNodes uint32, limitOneInboundConnectionPerIP bool) *ConnectionController { + rnManager *RemoteNodeManager, blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, connectIps []string, + targetNonValidatorOutboundRemoteNodes uint32, targetNonValidatorInboundRemoteNodes uint32, + limitOneInboundConnectionPerIP bool) *ConnectionController { return &ConnectionController{ params: params, @@ -57,16 +77,45 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh handshake: handshakeController, rnManager: rnManager, AddrMgr: addrMgr, + connectIps: connectIps, + persistentIpToRemoteNodeIdsMap: make(map[string]RemoteNodeId), targetNonValidatorOutboundRemoteNodes: targetNonValidatorOutboundRemoteNodes, targetNonValidatorInboundRemoteNodes: targetNonValidatorInboundRemoteNodes, limitOneInboundRemoteNodePerIP: limitOneInboundConnectionPerIP, + exitChan: make(chan struct{}), } } +func (cc *ConnectionController) Start() { + cc.startGroup.Add(1) + go cc.startPersistentConnector() + + cc.startGroup.Wait() + cc.exitGroup.Add(1) +} + +func (cc *ConnectionController) Stop() { + close(cc.exitChan) + cc.exitGroup.Wait() +} + func (cc *ConnectionController) GetRemoteNodeManager() *RemoteNodeManager { return cc.rnManager } +func (cc *ConnectionController) startPersistentConnector() { + cc.startGroup.Done() + for { + select { + case <-cc.exitChan: + cc.exitGroup.Done() + return + case <-time.After(1 * time.Second): + cc.refreshConnectIps() + } + } +} + // ########################### // ## Handlers (Peer, DeSoMessage) // ########################### @@ -77,6 +126,12 @@ func (cc *ConnectionController) _handleDonePeerMessage(origin *Peer, desoMsg DeS } cc.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) + // Update the persistentIpToRemoteNodeIdsMap. + for ip, id := range cc.persistentIpToRemoteNodeIdsMap { + if id.ToUint64() == origin.ID { + delete(cc.persistentIpToRemoteNodeIdsMap, ip) + } + } } func (cc *ConnectionController) _handleAddrMessage(origin *Peer, desoMsg DeSoMessage) { @@ -114,7 +169,7 @@ func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMs remoteNode, err = cc.processInboundConnection(msg.Connection) if err != nil { glog.Errorf("ConnectionController.handleNewConnectionMessage: Problem handling inbound connection: %v", err) - msg.Connection.Close() + cc.cleanupFailedInboundConnection(remoteNode, msg.Connection) return } case ConnectionTypeOutbound: @@ -130,6 +185,13 @@ func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMs cc.handshake.InitiateHandshake(remoteNode) } +func (cc *ConnectionController) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection Connection) { + if remoteNode != nil { + cc.rnManager.Disconnect(remoteNode) + } + connection.Close() +} + func (cc *ConnectionController) cleanupFailedOutboundConnection(connection Connection) { oc, ok := connection.(*outboundConnection) if !ok { @@ -141,13 +203,34 @@ func (cc *ConnectionController) cleanupFailedOutboundConnection(connection Conne if rn != nil { cc.rnManager.Disconnect(rn) } + oc.Close() cc.cmgr.RemoveAttemptedOutboundAddrs(oc.address) } // ########################### -// ## Connections +// ## Persistent Connections // ########################### +func (cc *ConnectionController) refreshConnectIps() { + // Connect to addresses passed via the --connect-ips flag. These addresses are persistent in the sense that if we + // disconnect from one, we will try to reconnect to the same one. + for _, connectIp := range cc.connectIps { + if _, ok := cc.persistentIpToRemoteNodeIdsMap[connectIp]; ok { + continue + } + + glog.Infof("ConnectionController.initiatePersistentConnections: Connecting to connectIp: %v", connectIp) + id, err := cc.CreateNonValidatorPersistentOutboundConnection(connectIp) + if err != nil { + glog.Errorf("ConnectionController.initiatePersistentConnections: Problem connecting "+ + "to connectIp %v: %v", connectIp, err) + continue + } + + cc.persistentIpToRemoteNodeIdsMap[connectIp] = id + } +} + func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) if err != nil { @@ -156,10 +239,10 @@ func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKe return cc.rnManager.CreateValidatorConnection(netAddr, publicKey) } -func (cc *ConnectionController) CreateNonValidatorPersistentOutboundConnection(ipStr string) error { +func (cc *ConnectionController) CreateNonValidatorPersistentOutboundConnection(ipStr string) (RemoteNodeId, error) { netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) if err != nil { - return err + return 0, err } return cc.rnManager.CreateNonValidatorPersistentOutboundConnection(netAddr) } @@ -235,8 +318,8 @@ func (cc *ConnectionController) processOutboundConnection(conn Connection) (*Rem } if oc.failed { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Failed to connect to peer (%s)", - oc.address.IP.String()) + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Failed to connect to peer (%s:%v)", + oc.address.IP.String(), oc.address.Port) } if !oc.isPersistent { @@ -263,11 +346,35 @@ func (cc *ConnectionController) processOutboundConnection(conn Connection) (*Rem "for addr: (%s)", oc.connection.RemoteAddr().String()) } + // Attach the connection before additional validation steps because it is already established. remoteNode, err := cc.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) if remoteNode == nil || err != nil { return nil, errors.Wrapf(err, "ConnectionController.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ "for addr: (%s)", oc.connection.RemoteAddr().String()) } + + // If this is a persistent remote node or a validator, we don't need to do any extra connection validation. + if remoteNode.IsPersistent() || remoteNode.GetValidatorPublicKey() != nil { + return remoteNode, nil + } + + // If we get here, it means we're dealing with a non-persistent or non-validator remote node. We perform additional + // connection validation. + + // If we already have enough outbound peers, then don't bother adding this one. + if cc.enoughNonValidatorOutboundConnections() { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connected to maximum number of outbound "+ + "peers (%d)", cc.targetNonValidatorOutboundRemoteNodes) + } + + // If the group key overlaps with another peer we're already connected to then abort mission. We only connect to + // one peer per IP group in order to prevent Sybil attacks. + if cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) + } + cc.cmgr.AddToGroupKey(na) + return remoteNode, nil } diff --git a/lib/connection_manager.go b/lib/connection_manager.go index 7c6f510ac..1ba4bf8f1 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -168,6 +168,10 @@ func NewConnectionManager( // Check if the address passed shares a group with any addresses already in our data structures. func (cmgr *ConnectionManager) IsFromRedundantOutboundIPAddress(na *wire.NetAddress) bool { groupKey := addrmgr.GroupKey(na) + // For the sake of running multiple nodes on the same machine, we allow localhost connections. + if groupKey == "local" { + return false + } cmgr.mtxOutboundConnIPGroups.Lock() numGroupsForKey := cmgr.outboundConnIPGroups[groupKey] @@ -185,7 +189,7 @@ func (cmgr *ConnectionManager) IsFromRedundantOutboundIPAddress(na *wire.NetAddr return true } -func (cmgr *ConnectionManager) addToGroupKey(na *wire.NetAddress) { +func (cmgr *ConnectionManager) AddToGroupKey(na *wire.NetAddress) { groupKey := addrmgr.GroupKey(na) cmgr.mtxOutboundConnIPGroups.Lock() @@ -429,7 +433,6 @@ func (cmgr *ConnectionManager) addPeer(pp *Peer) { // number of outbound peers. Also add the peer's address to // our map. if _, ok := peerList[pp.ID]; !ok { - cmgr.addToGroupKey(pp.netAddr) atomic.AddUint32(&cmgr.numOutboundPeers, 1) cmgr.mtxAddrsMaps.Lock() @@ -528,16 +531,6 @@ func (cmgr *ConnectionManager) _logOutboundPeerData() { numInboundPeers := int(atomic.LoadUint32(&cmgr.numInboundPeers)) numPersistentPeers := int(atomic.LoadUint32(&cmgr.numPersistentPeers)) glog.V(1).Infof("Num peers: OUTBOUND(%d) INBOUND(%d) PERSISTENT(%d)", numOutboundPeers, numInboundPeers, numPersistentPeers) - - cmgr.mtxOutboundConnIPGroups.Lock() - for _, vv := range cmgr.outboundConnIPGroups { - if vv != 0 && vv != 1 { - glog.V(1).Infof("_logOutboundPeerData: Peer group count != (0 or 1). "+ - "Is (%d) instead. This "+ - "should never happen.", vv) - } - } - cmgr.mtxOutboundConnIPGroups.Unlock() } func (cmgr *ConnectionManager) AddTimeSample(addrStr string, timeSample time.Time) { @@ -617,8 +610,13 @@ func (cmgr *ConnectionManager) Start() { select { case oc := <-cmgr.outboundConnectionChan: - glog.V(2).Infof("ConnectionManager.Start: Successfully established an outbound connection with "+ - "(addr= %v)", oc.connection.RemoteAddr()) + if oc.failed { + glog.V(2).Infof("ConnectionManager.Start: Failed to establish an outbound connection with "+ + "(id= %v)", oc.attemptId) + } else { + glog.V(2).Infof("ConnectionManager.Start: Successfully established an outbound connection with "+ + "(addr= %v)", oc.connection.RemoteAddr()) + } delete(cmgr.outboundConnectionAttempts, oc.attemptId) cmgr.serverMessageQueue <- &ServerMessage{ Peer: nil, diff --git a/lib/handshake_controller.go b/lib/handshake_controller.go index bde07745a..f355bad93 100644 --- a/lib/handshake_controller.go +++ b/lib/handshake_controller.go @@ -122,7 +122,7 @@ func (hc *HandshakeController) _handleVersionMessage(origin *Peer, desoMsg DeSoM if hc.usedNonces.Contains(msgNonce) { hc.usedNonces.Delete(msgNonce) glog.Errorf("HandshakeController._handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ - "nonce collision", origin.ID) + "nonce collision, nonce (%v)", origin.ID, msgNonce) hc.rnManager.Disconnect(rn) return } diff --git a/lib/network_connection.go b/lib/network_connection.go index eb6d4ab55..ffb0bb1f1 100644 --- a/lib/network_connection.go +++ b/lib/network_connection.go @@ -33,7 +33,9 @@ func (oc *outboundConnection) Close() { if oc.terminated { return } - oc.connection.Close() + if oc.connection != nil { + oc.connection.Close() + } oc.terminated = true } @@ -58,7 +60,9 @@ func (ic *inboundConnection) Close() { return } - ic.connection.Close() + if ic.connection != nil { + ic.connection.Close() + } ic.terminated = true } diff --git a/lib/remote_node.go b/lib/remote_node.go index f2d849a36..5ba651f3f 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -223,6 +223,10 @@ func (rn *RemoteNode) IsHandshakeCompleted() bool { return rn.connectionStatus == RemoteNodeStatus_HandshakeCompleted } +func (rn *RemoteNode) IsTerminated() bool { + return rn.connectionStatus == RemoteNodeStatus_Terminated +} + func (rn *RemoteNode) IsValidator() bool { if !rn.IsHandshakeCompleted() { return false diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index fb269d072..02bed8e3e 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -140,7 +140,7 @@ func (manager *RemoteNodeManager) CreateValidatorConnection(netAddr *wire.NetAdd } remoteNode := manager.newRemoteNode(publicKey) - if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { + if err := remoteNode.DialOutboundConnection(netAddr); err != nil { return errors.Wrapf(err, "RemoteNodeManager.CreateValidatorConnection: Problem calling DialPersistentOutboundConnection "+ "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) } @@ -149,19 +149,19 @@ func (manager *RemoteNodeManager) CreateValidatorConnection(netAddr *wire.NetAdd return nil } -func (manager *RemoteNodeManager) CreateNonValidatorPersistentOutboundConnection(netAddr *wire.NetAddress) error { +func (manager *RemoteNodeManager) CreateNonValidatorPersistentOutboundConnection(netAddr *wire.NetAddress) (RemoteNodeId, error) { if netAddr == nil { - return fmt.Errorf("RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: netAddr is nil") + return 0, fmt.Errorf("RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: netAddr is nil") } remoteNode := manager.newRemoteNode(nil) if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { - return errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: Problem calling DialPersistentOutboundConnection "+ + return 0, errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: Problem calling DialPersistentOutboundConnection "+ "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) } manager.setRemoteNode(remoteNode) manager.GetNonValidatorOutboundIndex().Set(remoteNode.GetId(), remoteNode) - return nil + return remoteNode.GetId(), nil } func (manager *RemoteNodeManager) CreateNonValidatorOutboundConnection(netAddr *wire.NetAddress) error { @@ -184,7 +184,7 @@ func (manager *RemoteNodeManager) AttachInboundConnection(conn net.Conn, remoteNode := manager.newRemoteNode(nil) if err := remoteNode.AttachInboundConnection(conn, na); err != nil { - return nil, errors.Wrapf(err, "RemoteNodeManager.AttachInboundConnection: Problem calling AttachInboundConnection "+ + return remoteNode, errors.Wrapf(err, "RemoteNodeManager.AttachInboundConnection: Problem calling AttachInboundConnection "+ "for addr: (%s)", conn.RemoteAddr().String()) } @@ -219,7 +219,7 @@ func (manager *RemoteNodeManager) setRemoteNode(rn *RemoteNode) { manager.mtx.Lock() defer manager.mtx.Unlock() - if rn == nil { + if rn == nil || rn.IsTerminated() { return } @@ -230,7 +230,7 @@ func (manager *RemoteNodeManager) SetNonValidator(rn *RemoteNode) { manager.mtx.Lock() defer manager.mtx.Unlock() - if rn == nil { + if rn == nil || rn.IsTerminated() { return } @@ -245,7 +245,7 @@ func (manager *RemoteNodeManager) SetValidator(remoteNode *RemoteNode) { manager.mtx.Lock() defer manager.mtx.Unlock() - if remoteNode == nil { + if remoteNode == nil || remoteNode.IsTerminated() { return } @@ -260,7 +260,7 @@ func (manager *RemoteNodeManager) UnsetValidator(remoteNode *RemoteNode) { manager.mtx.Lock() defer manager.mtx.Unlock() - if remoteNode == nil { + if remoteNode == nil || remoteNode.IsTerminated() { return } @@ -275,7 +275,7 @@ func (manager *RemoteNodeManager) UnsetNonValidator(rn *RemoteNode) { manager.mtx.Lock() defer manager.mtx.Unlock() - if rn == nil { + if rn == nil || rn.IsTerminated() { return } diff --git a/lib/server.go b/lib/server.go index d4c371955..a4fa28376 100644 --- a/lib/server.go +++ b/lib/server.go @@ -499,8 +499,8 @@ func NewServer( rnManager := NewRemoteNodeManager(srv, _chain, _cmgr, _blsKeystore, _params, _minFeeRateNanosPerKB, nodeServices) srv.handshakeController = NewHandshakeController(rnManager) - srv.connectionController = NewConnectionController(_params, _cmgr, srv.handshakeController, rnManager, - _blsKeystore, _desoAddrMgr, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) + srv.connectionController = NewConnectionController(_params, _cmgr, srv.handshakeController, rnManager, _blsKeystore, + _desoAddrMgr, _connectIps, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) if srv.stateChangeSyncer != nil { srv.stateChangeSyncer.BlockHeight = uint64(_chain.headerTip().Height) @@ -2547,6 +2547,9 @@ func (srv *Server) Stop() { srv.cmgr.Stop() glog.Infof(CLog(Yellow, "Server.Stop: Closed the ConnectionManger")) + srv.connectionController.Stop() + glog.Infof(CLog(Yellow, "Server.Stop: Closed the ConnectionController")) + // Stop the miner if we have one running. if srv.miner != nil { srv.miner.Stop() @@ -2629,6 +2632,8 @@ func (srv *Server) Start() { if srv.miner != nil && len(srv.miner.PublicKeys) > 0 { go srv.miner.Start() } + + srv.connectionController.Start() } // SyncPrefixProgress keeps track of sync progress on an individual prefix. It is used in From 59af74dff65f3572ad8ac4d45488d6e182e67239 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:42:42 -0500 Subject: [PATCH 09/37] Revert "Code split" (#943) This reverts commit 831096ac1d3008233868ac8b8f0eca4cd2b9553e. --- .../connection_controller_routines_test.go | 217 ++++++++++++++++++ .../connection_controller_utils_test.go | 8 + lib/connection_controller.go | 103 ++++++++- 3 files changed, 326 insertions(+), 2 deletions(-) diff --git a/integration_testing/connection_controller_routines_test.go b/integration_testing/connection_controller_routines_test.go index 582031bd8..6eec830dd 100644 --- a/integration_testing/connection_controller_routines_test.go +++ b/integration_testing/connection_controller_routines_test.go @@ -85,6 +85,223 @@ func TestConnectionControllerInitiatePersistentConnections(t *testing.T) { t.Logf("Test #2 passed | Successfully run validator node6 with --connect-ips set to node2, node3, node4, node5") } +func TestConnectionControllerValidatorConnector(t *testing.T) { + require := require.New(t) + t.Cleanup(func() { + setGetActiveValidatorImpl(lib.BasicGetActiveValidators) + }) + + // Spawn 5 validators node1, node2, node3, node4, node5 and two non-validators node6 and node7. + // All the validators are initially in the validator set. And later, node1 and node2 will be removed from the + // validator set. Then, make node3 inactive, and node2 active again. Then, make all the validators inactive. + // Make node6, and node7 connect-ips to all the validators. + + blsPriv1, err := bls.NewPrivateKey() + require.NoError(err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) + blsPriv2, err := bls.NewPrivateKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + blsPriv3, err := bls.NewPrivateKey() + require.NoError(err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + blsPriv4, err := bls.NewPrivateKey() + require.NoError(err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsPriv5, err := bls.NewPrivateKey() + require.NoError(err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + + node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") + node7 := spawnNonValidatorNodeProtocol2(t, 18006, "node7") + + node1 = startNode(t, node1) + defer node1.Stop() + node2 = startNode(t, node2) + defer node2.Stop() + node3 = startNode(t, node3) + defer node3.Stop() + node4 = startNode(t, node4) + defer node4.Stop() + node5 = startNode(t, node5) + defer node5.Stop() + setGetActiveValidatorImplWithValidatorNodes(t, node1, node2, node3, node4, node5) + + node6.Config.ConnectIPs = []string{ + node1.Listeners[0].Addr().String(), + node2.Listeners[0].Addr().String(), + node3.Listeners[0].Addr().String(), + node4.Listeners[0].Addr().String(), + node5.Listeners[0].Addr().String(), + } + node7.Config.ConnectIPs = node6.Config.ConnectIPs + node6 = startNode(t, node6) + defer node6.Stop() + node7 = startNode(t, node7) + defer node7.Stop() + + // Verify full graph between active validators. + waitForValidatorFullGraph(t, node1, node2, node3, node4, node5) + // Verify connections of non-validators. + for _, nonValidator := range []*cmd.Node{node6, node7} { + waitForValidatorConnectionOneWay(t, nonValidator, node1, node2, node3, node4, node5) + } + // Verify connections of initial validators. + for _, validator := range []*cmd.Node{node1, node2, node3, node4, node5} { + waitForNonValidatorInboundConnection(t, validator, node6) + waitForNonValidatorInboundConnection(t, validator, node7) + } + // Verify connection counts of active validators. + for _, validator := range []*cmd.Node{node1, node2, node3, node4, node5} { + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 4, 0, 2) + } + // NOOP Verify connection counts of inactive validators. + // Verify connection counts of non-validators. + waitForCountRemoteNodeIndexer(t, node6, 5, 5, 0, 0) + waitForCountRemoteNodeIndexer(t, node7, 5, 5, 0, 0) + t.Logf("Test #1 passed | Successfully run validators node1, node2, node3, node4, node5; non-validators node6, node7") + + // Remove node1 and node2 from the validator set. + setGetActiveValidatorImplWithValidatorNodes(t, node3, node4, node5) + // Verify full graph between active validators. + waitForValidatorFullGraph(t, node3, node4, node5) + // Verify connections of non-validators. + for _, nonValidator := range []*cmd.Node{node1, node2, node6, node7} { + waitForValidatorConnectionOneWay(t, nonValidator, node3, node4, node5) + } + // Verify connections of initial validators. + for _, validator := range []*cmd.Node{node1, node2, node3, node4, node5} { + waitForNonValidatorInboundConnection(t, validator, node6) + waitForNonValidatorInboundConnection(t, validator, node7) + } + // Verify connections of active validators. + for _, validator := range []*cmd.Node{node3, node4, node5} { + waitForNonValidatorInboundXOROutboundConnection(t, validator, node1) + waitForNonValidatorInboundXOROutboundConnection(t, validator, node2) + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 2, 0, 2) + } + // Verify connection counts of inactive validators. + for _, validator := range []*cmd.Node{node1, node2} { + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 3, 0, 2) + } + // Verify connection counts of non-validators. + waitForCountRemoteNodeIndexer(t, node6, 5, 3, 2, 0) + waitForCountRemoteNodeIndexer(t, node7, 5, 3, 2, 0) + t.Logf("Test #2 passed | Successfully run validators node3, node4, node5; inactive-validators node1, node2; " + + "non-validators node6, node7") + + // Remove node3 from the validator set. Make node1 active again. + setGetActiveValidatorImplWithValidatorNodes(t, node1, node4, node5) + // Verify full graph between active validators. + waitForValidatorFullGraph(t, node1, node4, node5) + // Verify connections of non-validators. + for _, nonValidator := range []*cmd.Node{node2, node3, node6, node7} { + waitForValidatorConnectionOneWay(t, nonValidator, node1, node4, node5) + } + // Verify connections of initial validators. + for _, validator := range []*cmd.Node{node1, node2, node3, node4, node5} { + waitForNonValidatorInboundConnection(t, validator, node6) + waitForNonValidatorInboundConnection(t, validator, node7) + } + // Verify connections of active validators. + for _, validator := range []*cmd.Node{node1, node4, node5} { + waitForNonValidatorInboundXOROutboundConnection(t, validator, node2) + waitForNonValidatorInboundXOROutboundConnection(t, validator, node3) + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 2, 0, 2) + } + // Verify connection counts of inactive validators. + for _, validator := range []*cmd.Node{node2, node3} { + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 3, 0, 2) + } + // Verify connection counts of non-validators. + waitForCountRemoteNodeIndexer(t, node6, 5, 3, 2, 0) + waitForCountRemoteNodeIndexer(t, node7, 5, 3, 2, 0) + t.Logf("Test #3 passed | Successfully run validators node1, node4, node5; inactive validators node2, node3; " + + "non-validators node6, node7") + + // Make all validators inactive. + setGetActiveValidatorImplWithValidatorNodes(t) + // NOOP Verify full graph between active validators. + // NOOP Verify connections of non-validators. + // Verify connections of initial validators. + for _, validator := range []*cmd.Node{node1, node2, node3, node4, node5} { + waitForNonValidatorInboundConnection(t, validator, node6) + waitForNonValidatorInboundConnection(t, validator, node7) + } + // NOOP Verify connections of active validators. + // Verify connections and counts of inactive validators. + inactiveValidators := []*cmd.Node{node1, node2, node3, node4, node5} + for ii := 0; ii < len(inactiveValidators); ii++ { + for jj := ii + 1; jj < len(inactiveValidators); jj++ { + waitForNonValidatorInboundXOROutboundConnection(t, inactiveValidators[ii], inactiveValidators[jj]) + } + } + inactiveValidatorsRev := []*cmd.Node{node5, node4, node3, node2, node1} + for ii := 0; ii < len(inactiveValidatorsRev); ii++ { + for jj := ii + 1; jj < len(inactiveValidatorsRev); jj++ { + waitForNonValidatorInboundXOROutboundConnection(t, inactiveValidatorsRev[ii], inactiveValidatorsRev[jj]) + } + } + for _, validator := range inactiveValidators { + waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 0, 0, 2) + } + // Verify connection counts of non-validators. + waitForCountRemoteNodeIndexer(t, node6, 5, 0, 5, 0) + waitForCountRemoteNodeIndexer(t, node7, 5, 0, 5, 0) + t.Logf("Test #4 passed | Successfully run inactive validators node1, node2, node3, node4, node5; " + + "non-validators node6, node7") +} + +func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { + require := require.New(t) + t.Cleanup(func() { + setGetActiveValidatorImpl(lib.BasicGetActiveValidators) + }) + + // Spawn a non-validator node1, and two validators node2, node3. The validator nodes will have the same public key. + // Node2 and node3 will not initially be in the validator set. First, node2 will start an outbound connection to + // node1. We wait until the node2 is re-indexed as non-validator by node1, and then we make node3 open an outbound + // connection to node1. We wait until node3 is re-indexed as non-validator by node1. Then, we make node2 and node3 + // join the validator set (i.e. add one entry with the duplicated public key). Now, node1 should disconnect from + // either node2 or node3 because of duplicate public key. + + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + blsPriv2, err := bls.NewPrivateKey() + require.NoError(err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv2) + + node1 = startNode(t, node1) + defer node1.Stop() + node2 = startNode(t, node2) + defer node2.Stop() + node3 = startNode(t, node3) + defer node3.Stop() + + cc2 := node2.Server.GetConnectionController() + require.NoError(cc2.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + // First wait for node2 to be indexed as a validator by node1. + waitForValidatorConnection(t, node1, node2) + // Now wait for node2 to be re-indexed as a non-validator. + waitForNonValidatorInboundConnectionDynamic(t, node1, node2, true) + waitForNonValidatorOutboundConnection(t, node2, node1) + + // Now connect node3 to node1. + cc3 := node3.Server.GetConnectionController() + require.NoError(cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + // First wait for node3 to be indexed as a validator by node1. + waitForValidatorConnection(t, node1, node3) + // Now wait for node3 to be re-indexed as a non-validator. + waitForNonValidatorInboundConnectionDynamic(t, node1, node3, true) + waitForNonValidatorOutboundConnection(t, node3, node1) + + // Now add node2 and node3 to the validator set. + setGetActiveValidatorImplWithValidatorNodes(t, node2) + // Now wait for node1 to disconnect from either node2 or node3. + waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) + t.Logf("Test #1 passed | Successfully run non-validator node1; validators node2, node3 with duplicate public key") +} + func TestConnectionControllerNonValidatorCircularConnectIps(t *testing.T) { node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") diff --git a/integration_testing/connection_controller_utils_test.go b/integration_testing/connection_controller_utils_test.go index 74a33b943..43cf418bc 100644 --- a/integration_testing/connection_controller_utils_test.go +++ b/integration_testing/connection_controller_utils_test.go @@ -69,6 +69,14 @@ func waitForNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 * waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound non-validator Node (%s)", userAgentN1, userAgentN2), condition) } +func waitForNonValidatorInboundConnectionDynamic(t *testing.T, node1 *cmd.Node, node2 *cmd.Node, inactiveValidator bool) { + userAgentN1 := node1.Params.UserAgent + userAgentN2 := node2.Params.UserAgent + condition := conditionNonValidatorInboundConnectionDynamic(t, node1, node2, inactiveValidator) + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to connect to inbound non-validator Node (%s), "+ + "inactiveValidator (%v)", userAgentN1, userAgentN2, inactiveValidator), condition) +} + func conditionNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) func() bool { return conditionNonValidatorInboundConnectionDynamic(t, node1, node2, false) } diff --git a/lib/connection_controller.go b/lib/connection_controller.go index 18d423f46..2f6cbb2db 100644 --- a/lib/connection_controller.go +++ b/lib/connection_controller.go @@ -87,11 +87,12 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh } func (cc *ConnectionController) Start() { - cc.startGroup.Add(1) + cc.startGroup.Add(2) go cc.startPersistentConnector() + go cc.startValidatorConnector() cc.startGroup.Wait() - cc.exitGroup.Add(1) + cc.exitGroup.Add(2) } func (cc *ConnectionController) Stop() { @@ -116,6 +117,26 @@ func (cc *ConnectionController) startPersistentConnector() { } } +// startValidatorConnector is responsible for ensuring that the node is connected to all active validators. It does +// this in two steps. First, it looks through the already established connections and checks if any of these connections +// are validators. If they are, it adds them to the validator index. It also checks if any of the existing validators +// are no longer active and removes them from the validator index. Second, it checks if any of the active validators +// are missing from the validator index. If they are, it attempts to connect to them. +func (cc *ConnectionController) startValidatorConnector() { + cc.startGroup.Done() + for { + select { + case <-cc.exitChan: + cc.exitGroup.Done() + return + case <-time.After(1 * time.Second): + activeValidatorsMap := GetActiveValidatorImpl() + cc.refreshValidatorIndex(activeValidatorsMap) + cc.connectValidators(activeValidatorsMap) + } + } +} + // ########################### // ## Handlers (Peer, DeSoMessage) // ########################### @@ -231,6 +252,84 @@ func (cc *ConnectionController) refreshConnectIps() { } } +// ########################### +// ## Validator Connections +// ########################### + +// refreshValidatorIndex re-indexes validators based on the activeValidatorsMap. It is called periodically by the +// validator connector. +func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]) { + // De-index inactive validators. We skip any checks regarding RemoteNodes connection status, nor do we verify whether + // de-indexing the validator would result in an excess number of outbound/inbound connections. Any excess connections + // will be cleaned up by the peer connector. + validatorRemoteNodeMap := cc.rnManager.GetValidatorIndex().Copy() + for pk, rn := range validatorRemoteNodeMap { + // If the validator is no longer active, de-index it. + if _, ok := activeValidatorsMap.Get(pk); !ok { + cc.rnManager.SetNonValidator(rn) + cc.rnManager.UnsetValidator(rn) + } + } + + // Look for validators in our existing outbound / inbound connections. + allNonValidators := cc.rnManager.GetAllNonValidators() + for _, rn := range allNonValidators { + // It is possible for a RemoteNode to be in the non-validator indices, and still have a public key. This can happen + // if the RemoteNode advertised support for the SFValidator service flag during handshake, and provided us + // with a public key, and a corresponding proof of possession signature. + pk := rn.GetValidatorPublicKey() + if pk == nil { + continue + } + // It is possible that through unlikely concurrence, and malevolence, two non-validators happen to have the same + // public key, which goes undetected during handshake. To prevent this from affecting the indexing of the validator + // set, we check that the non-validator's public key is not already present in the validator index. + if _, ok := cc.rnManager.GetValidatorIndex().Get(pk.Serialize()); ok { + cc.rnManager.Disconnect(rn) + continue + } + + // If the RemoteNode turns out to be in the validator set, index it. + if _, ok := activeValidatorsMap.Get(pk.Serialize()); ok { + cc.rnManager.SetValidator(rn) + cc.rnManager.UnsetNonValidator(rn) + } + } +} + +// connectValidators attempts to connect to all active validators that are not already connected. It is called +// periodically by the validator connector. +func (cc *ConnectionController) connectValidators(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]) { + // Look through the active validators and connect to any that we're not already connected to. + if cc.blsKeystore == nil { + return + } + + validators := activeValidatorsMap.Copy() + for pk, validator := range validators { + _, exists := cc.rnManager.GetValidatorIndex().Get(pk) + // If we're already connected to the validator, continue. + if exists { + continue + } + if cc.blsKeystore.GetSigner().GetPublicKey().Serialize() == pk { + continue + } + + publicKey, err := pk.Deserialize() + if err != nil { + continue + } + + // For now, we only dial the first domain in the validator's domain list. + address := string(validator.Domains[0]) + if err := cc.CreateValidatorConnection(address, publicKey); err != nil { + glog.V(2).Infof("ConnectionController.connectValidators: Problem connecting to validator %v: %v", address, err) + continue + } + } +} + func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) if err != nil { From 390156459693f82a8f42757c1931cd599f0db1dc Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:43:59 -0500 Subject: [PATCH 10/37] Revert "Code split" (#944) This reverts commit 0604b6d3fc155177a2bb295e6635ed21b20dd947. --- .../connection_controller_routines_test.go | 43 +++++ lib/block_view_types.go | 2 +- lib/connection_controller.go | 150 +++++++++++++++++- 3 files changed, 192 insertions(+), 3 deletions(-) diff --git a/integration_testing/connection_controller_routines_test.go b/integration_testing/connection_controller_routines_test.go index 6eec830dd..95f289aba 100644 --- a/integration_testing/connection_controller_routines_test.go +++ b/integration_testing/connection_controller_routines_test.go @@ -302,6 +302,49 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { t.Logf("Test #1 passed | Successfully run non-validator node1; validators node2, node3 with duplicate public key") } +func TestConnectionControllerNonValidatorConnector(t *testing.T) { + require := require.New(t) + + // Spawn 6 non-validators node1, node2, node3, node4, node5, node6. Set node1's targetOutboundPeers to 3. Then make + // node1 create persistent outbound connections to node2, node3, and node4, as well as non-validator connections to + // node5 and node6. + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Config.TargetOutboundPeers = 0 + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node4 := spawnNonValidatorNodeProtocol2(t, 18003, "node4") + node5 := spawnNonValidatorNodeProtocol2(t, 18004, "node5") + node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") + + node2 = startNode(t, node2) + defer node2.Stop() + node3 = startNode(t, node3) + defer node3.Stop() + node4 = startNode(t, node4) + defer node4.Stop() + node5 = startNode(t, node5) + defer node5.Stop() + node6 = startNode(t, node6) + defer node6.Stop() + + node1.Config.ConnectIPs = []string{ + node2.Listeners[0].Addr().String(), + node3.Listeners[0].Addr().String(), + node4.Listeners[0].Addr().String(), + } + node1 = startNode(t, node1) + defer node1.Stop() + + cc := node1.Server.GetConnectionController() + require.NoError(cc.CreateNonValidatorOutboundConnection(node5.Listeners[0].Addr().String())) + require.NoError(cc.CreateNonValidatorOutboundConnection(node6.Listeners[0].Addr().String())) + + waitForCountRemoteNodeIndexer(t, node1, 3, 0, 3, 0) + waitForNonValidatorOutboundConnection(t, node1, node2) + waitForNonValidatorOutboundConnection(t, node1, node3) + waitForNonValidatorOutboundConnection(t, node1, node4) +} + func TestConnectionControllerNonValidatorCircularConnectIps(t *testing.T) { node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") diff --git a/lib/block_view_types.go b/lib/block_view_types.go index b216804df..073eb4c2f 100644 --- a/lib/block_view_types.go +++ b/lib/block_view_types.go @@ -1331,7 +1331,7 @@ func (op *UtxoOperation) RawEncodeWithoutMetadata(blockHeight uint64, skipMetada data = append(data, EncodeToBytes(blockHeight, op.PrevLockupYieldCurvePoint, skipMetadata...)...) data = append(data, byte(op.PrevLockupTransferRestriction)) - // PrevSenderLockedBalanceEntry, PrevReceiverLockedBalanceEntry + // PrevSenderLockedBalanceEntry, PrevReceiverL*ockedBalanceEntry data = append(data, EncodeToBytes(blockHeight, op.PrevSenderLockedBalanceEntry, skipMetadata...)...) data = append(data, EncodeToBytes(blockHeight, op.PrevReceiverLockedBalanceEntry, skipMetadata...)...) diff --git a/lib/connection_controller.go b/lib/connection_controller.go index 2f6cbb2db..4bda85507 100644 --- a/lib/connection_controller.go +++ b/lib/connection_controller.go @@ -87,12 +87,13 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh } func (cc *ConnectionController) Start() { - cc.startGroup.Add(2) + cc.startGroup.Add(3) go cc.startPersistentConnector() go cc.startValidatorConnector() + go cc.startNonValidatorConnector() cc.startGroup.Wait() - cc.exitGroup.Add(2) + cc.exitGroup.Add(3) } func (cc *ConnectionController) Stop() { @@ -137,6 +138,26 @@ func (cc *ConnectionController) startValidatorConnector() { } } +// startNonValidatorConnector is responsible for ensuring that the node is connected to the target number of outbound +// and inbound remote nodes. To do this, it periodically checks the number of outbound and inbound remote nodes, and +// if the number is above the target number, it disconnects the excess remote nodes. If the number is below the target +// number, it attempts to connect to new remote nodes. +func (cc *ConnectionController) startNonValidatorConnector() { + cc.startGroup.Done() + + for { + select { + case <-cc.exitChan: + cc.exitGroup.Done() + return + case <-time.After(1 * time.Second): + cc.refreshNonValidatorOutboundIndex() + cc.refreshNonValidatorInboundIndex() + cc.connectNonValidators() + } + } +} + // ########################### // ## Handlers (Peer, DeSoMessage) // ########################### @@ -330,6 +351,131 @@ func (cc *ConnectionController) connectValidators(activeValidatorsMap *collectio } } +// ########################### +// ## NonValidator Connections +// ########################### + +// refreshNonValidatorOutboundIndex is called periodically by the peer connector. It is responsible for disconnecting excess +// outbound remote nodes. +func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { + // There are three categories of outbound remote nodes: attempted, connected, and persistent. All of these + // remote nodes are stored in the same non-validator outbound index. We want to disconnect excess remote nodes that + // are not persistent, starting with the attempted nodes first. + + // First let's run a quick check to see if the number of our non-validator remote nodes exceeds our target. Note that + // this number will include the persistent nodes. + numOutboundRemoteNodes := uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) + if numOutboundRemoteNodes <= cc.targetNonValidatorOutboundRemoteNodes { + return + } + + // If we get here, it means that we should potentially disconnect some remote nodes. Let's first separate the + // attempted and connected remote nodes, ignoring the persistent ones. + allOutboundRemoteNodes := cc.rnManager.GetNonValidatorOutboundIndex().GetAll() + var attemptedOutboundRemoteNodes, connectedOutboundRemoteNodes []*RemoteNode + for _, rn := range allOutboundRemoteNodes { + if rn.IsPersistent() { + // We do nothing for persistent remote nodes. + continue + } else if rn.IsHandshakeCompleted() { + connectedOutboundRemoteNodes = append(connectedOutboundRemoteNodes, rn) + } else { + attemptedOutboundRemoteNodes = append(attemptedOutboundRemoteNodes, rn) + } + } + + // Having separated the attempted and connected remote nodes, we can now find the actual number of attempted and + // connected remote nodes. We can then find out how many remote nodes we need to disconnect. + numOutboundRemoteNodes = uint32(len(attemptedOutboundRemoteNodes) + len(connectedOutboundRemoteNodes)) + excessiveOutboundRemoteNodes := uint32(0) + if numOutboundRemoteNodes > cc.targetNonValidatorOutboundRemoteNodes { + excessiveOutboundRemoteNodes = numOutboundRemoteNodes - cc.targetNonValidatorOutboundRemoteNodes + } + + // First disconnect the attempted remote nodes. + for _, rn := range attemptedOutboundRemoteNodes { + if excessiveOutboundRemoteNodes == 0 { + break + } + cc.rnManager.Disconnect(rn) + excessiveOutboundRemoteNodes-- + } + // Now disconnect the connected remote nodes, if we still have too many remote nodes. + for _, rn := range connectedOutboundRemoteNodes { + if excessiveOutboundRemoteNodes == 0 { + break + } + cc.rnManager.Disconnect(rn) + excessiveOutboundRemoteNodes-- + } +} + +// refreshNonValidatorInboundIndex is called periodically by the non-validator connector. It is responsible for +// disconnecting excess inbound remote nodes. +func (cc *ConnectionController) refreshNonValidatorInboundIndex() { + // First let's check if we have an excess number of inbound remote nodes. If we do, we'll disconnect some of them. + numConnectedInboundRemoteNodes := uint32(cc.rnManager.GetNonValidatorInboundIndex().Count()) + excessiveInboundRemoteNodes := uint32(0) + if numConnectedInboundRemoteNodes > cc.targetNonValidatorInboundRemoteNodes { + excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - cc.targetNonValidatorInboundRemoteNodes + } + // Disconnect random inbound non-validators if we have too many of them. + inboundRemoteNodes := cc.rnManager.GetNonValidatorInboundIndex().GetAll() + for _, rn := range inboundRemoteNodes { + if excessiveInboundRemoteNodes == 0 { + break + } + cc.rnManager.Disconnect(rn) + excessiveInboundRemoteNodes-- + } +} + +func (cc *ConnectionController) connectNonValidators() { + numOutboundPeers := uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) + + remainingOutboundPeers := uint32(0) + if numOutboundPeers < cc.targetNonValidatorOutboundRemoteNodes { + remainingOutboundPeers = cc.targetNonValidatorOutboundRemoteNodes - numOutboundPeers + } + for ii := uint32(0); ii < remainingOutboundPeers; ii++ { + addr := cc.getRandomUnconnectedAddress() + if addr == nil { + break + } + cc.AddrMgr.Attempt(addr) + if err := cc.rnManager.CreateNonValidatorOutboundConnection(addr); err != nil { + glog.V(2).Infof("ConnectionController.connectNonValidators: Problem connecting to addr %v: %v", addr, err) + } + } +} + +func (cc *ConnectionController) getRandomUnconnectedAddress() *wire.NetAddress { + for tries := 0; tries < 100; tries++ { + addr := cc.AddrMgr.GetAddress() + if addr == nil { + break + } + + if cc.cmgr.IsConnectedOutboundIpAddress(addr.NetAddress()) { + continue + } + + if cc.cmgr.IsAttemptedOutboundIpAddress(addr.NetAddress()) { + continue + } + + // We can only have one outbound address per /16. This is similar to + // Bitcoin and we do it to prevent Sybil attacks. + if cc.cmgr.IsFromRedundantOutboundIPAddress(addr.NetAddress()) { + continue + } + + return addr.NetAddress() + } + + return nil +} + func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) if err != nil { From 3543f2c79e7b7d0e708f144ffa087b14d7065053 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:13:51 -0800 Subject: [PATCH 11/37] Nits --- lib/block_view_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/block_view_types.go b/lib/block_view_types.go index 55777718c..77918b209 100644 --- a/lib/block_view_types.go +++ b/lib/block_view_types.go @@ -1370,7 +1370,7 @@ func (op *UtxoOperation) RawEncodeWithoutMetadata(blockHeight uint64, skipMetada data = append(data, EncodeToBytes(blockHeight, op.PrevLockupYieldCurvePoint, skipMetadata...)...) data = append(data, byte(op.PrevLockupTransferRestriction)) - // PrevSenderLockedBalanceEntry, PrevReceiverL*ockedBalanceEntry + // PrevSenderLockedBalanceEntry, PrevReceiverLockedBalanceEntry data = append(data, EncodeToBytes(blockHeight, op.PrevSenderLockedBalanceEntry, skipMetadata...)...) data = append(data, EncodeToBytes(blockHeight, op.PrevReceiverLockedBalanceEntry, skipMetadata...)...) From 927df23540ba4bb8b5510460666e89aab290a9a1 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:51:14 -0800 Subject: [PATCH 12/37] PoS NetworkManager RemoteNode Cleanup (#945) * Revert "Code split" This reverts commit c0c32f3943ead0e06fdfb3343954a6b5273ea887. * Review * Sync trunk * Rename --- collections/concurrent_map.go | 13 +- collections/concurrent_map_test.go | 2 +- consensus/integration_test_types.go | 4 + consensus/types.go | 1 + consensus/types_internal.go | 4 + .../connection_controller_routines_test.go | 347 ++++++++++++------ .../connection_controller_test.go | 265 ++++++------- .../connection_controller_utils_test.go | 53 ++- integration_testing/tools.go | 2 +- lib/block_view_validator.go | 4 + lib/connection_controller.go | 152 +++++--- lib/connection_manager.go | 6 +- lib/constants.go | 9 + lib/handshake_controller.go | 16 +- lib/network.go | 17 - lib/network_connection.go | 10 +- lib/peer.go | 12 +- lib/remote_node.go | 50 ++- lib/remote_node_manager.go | 37 +- lib/server.go | 11 +- 20 files changed, 652 insertions(+), 363 deletions(-) diff --git a/collections/concurrent_map.go b/collections/concurrent_map.go index 7e3b1b09f..e16d64dc7 100644 --- a/collections/concurrent_map.go +++ b/collections/concurrent_map.go @@ -39,7 +39,18 @@ func (cm *ConcurrentMap[Key, Value]) Get(key Key) (Value, bool) { return val, ok } -func (cm *ConcurrentMap[Key, Value]) Copy() map[Key]Value { +func (cm *ConcurrentMap[Key, Value]) Clone() *ConcurrentMap[Key, Value] { + cm.mtx.RLock() + defer cm.mtx.RUnlock() + + clone := NewConcurrentMap[Key, Value]() + for key, val := range cm.m { + clone.Set(key, val) + } + return clone +} + +func (cm *ConcurrentMap[Key, Value]) ToMap() map[Key]Value { cm.mtx.RLock() defer cm.mtx.RUnlock() diff --git a/collections/concurrent_map_test.go b/collections/concurrent_map_test.go index 46fc0c9ba..aac89b2fb 100644 --- a/collections/concurrent_map_test.go +++ b/collections/concurrent_map_test.go @@ -36,7 +36,7 @@ func TestConcurrentMap(t *testing.T) { } // test copy - copy := m.Copy() + copy := m.ToMap() for key, val := range control { if mVal, ok := copy[key]; !ok || mVal != val { t.Errorf("Expected %d, got %d", val, m.m[key]) diff --git a/consensus/integration_test_types.go b/consensus/integration_test_types.go index 3613e63e7..f674332c6 100644 --- a/consensus/integration_test_types.go +++ b/consensus/integration_test_types.go @@ -102,6 +102,10 @@ func (node *validatorNode) GetStakeAmount() *uint256.Int { return node.stake } +func (node *validatorNode) GetDomains() [][]byte { + return [][]byte{} +} + func (node *validatorNode) ProcessBlock(incomingBlock *block) { node.lock.Lock() defer node.lock.Unlock() diff --git a/consensus/types.go b/consensus/types.go index a4714f109..de134453e 100644 --- a/consensus/types.go +++ b/consensus/types.go @@ -76,6 +76,7 @@ type BlockHash interface { type Validator interface { GetPublicKey() *bls.PublicKey GetStakeAmount() *uint256.Int + GetDomains() [][]byte } type AggregateQuorumCertificate interface { diff --git a/consensus/types_internal.go b/consensus/types_internal.go index 7b98ce1a2..1eb40eeb7 100644 --- a/consensus/types_internal.go +++ b/consensus/types_internal.go @@ -35,6 +35,10 @@ func (v *validator) GetStakeAmount() *uint256.Int { return v.stakeAmount } +func (v *validator) GetDomains() [][]byte { + return [][]byte{} +} + //////////////////////////////////////////////////////////////////////// // AggregateQuorumCertificate interface implementation for internal use. // We use this type for unit tests, and to construct timeout QCs for diff --git a/integration_testing/connection_controller_routines_test.go b/integration_testing/connection_controller_routines_test.go index 95f289aba..1f30e22b5 100644 --- a/integration_testing/connection_controller_routines_test.go +++ b/integration_testing/connection_controller_routines_test.go @@ -5,36 +5,32 @@ import ( "github.com/deso-protocol/core/bls" "github.com/deso-protocol/core/cmd" "github.com/deso-protocol/core/collections" + "github.com/deso-protocol/core/consensus" "github.com/deso-protocol/core/lib" "github.com/stretchr/testify/require" + "github.com/tyler-smith/go-bip39" "testing" + "time" ) func TestConnectionControllerInitiatePersistentConnections(t *testing.T) { - require := require.New(t) - t.Cleanup(func() { - setGetActiveValidatorImpl(lib.BasicGetActiveValidators) - }) - // NonValidator Node1 will set its --connect-ips to two non-validators node2 and node3, // and two validators node4 and node5. node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) - blsPriv5, err := bls.NewPrivateKey() - require.NoError(err) - node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + blsSeedPhrase5, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase5) node2 = startNode(t, node2) node3 = startNode(t, node3) node4 = startNode(t, node4) node5 = startNode(t, node5) - setGetActiveValidatorImplWithValidatorNodes(t, node4, node5) - node1.Config.ConnectIPs = []string{ node2.Listeners[0].Addr().String(), node3.Listeners[0].Addr().String(), @@ -42,23 +38,25 @@ func TestConnectionControllerInitiatePersistentConnections(t *testing.T) { node5.Listeners[0].Addr().String(), } node1 = startNode(t, node1) + activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node4, node5) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorOutboundConnection(t, node1, node3) waitForValidatorConnection(t, node1, node4) waitForValidatorConnection(t, node1, node5) waitForValidatorConnection(t, node4, node5) - waitForCountRemoteNodeIndexer(t, node1, 4, 2, 2, 0) - waitForCountRemoteNodeIndexer(t, node2, 1, 0, 0, 1) - waitForCountRemoteNodeIndexer(t, node3, 1, 0, 0, 1) - waitForCountRemoteNodeIndexer(t, node4, 2, 1, 0, 1) - waitForCountRemoteNodeIndexer(t, node5, 2, 1, 0, 1) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 4, 2, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node2, 1, 0, 0, 1) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node3, 1, 0, 0, 1) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node4, 2, 1, 0, 1) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node5, 2, 1, 0, 1) node1.Stop() t.Logf("Test #1 passed | Successfully run non-validator node1 with --connect-ips set to node2, node3, node4, node5") // Now try again with a validator node6, with connect-ips set to node2, node3, node4, node5. - blsPriv6, err := bls.NewPrivateKey() - require.NoError(err) - node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsPriv6) + blsSeedPhrase6, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsSeedPhrase6) node6.Config.ConnectIPs = []string{ node2.Listeners[0].Addr().String(), node3.Listeners[0].Addr().String(), @@ -66,66 +64,118 @@ func TestConnectionControllerInitiatePersistentConnections(t *testing.T) { node5.Listeners[0].Addr().String(), } node6 = startNode(t, node6) - setGetActiveValidatorImplWithValidatorNodes(t, node4, node5, node6) + activeValidatorsMap = getActiveValidatorsMapWithValidatorNodes(t, node4, node5, node6) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6) waitForNonValidatorOutboundConnection(t, node6, node2) waitForNonValidatorOutboundConnection(t, node6, node3) waitForValidatorConnection(t, node6, node4) waitForValidatorConnection(t, node6, node5) waitForValidatorConnection(t, node4, node5) - waitForCountRemoteNodeIndexer(t, node6, 4, 2, 2, 0) - waitForCountRemoteNodeIndexer(t, node2, 1, 1, 0, 0) - waitForCountRemoteNodeIndexer(t, node3, 1, 1, 0, 0) - waitForCountRemoteNodeIndexer(t, node4, 2, 2, 0, 0) - waitForCountRemoteNodeIndexer(t, node5, 2, 2, 0, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node6, 4, 2, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node2, 1, 1, 0, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node3, 1, 1, 0, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node4, 2, 2, 0, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node5, 2, 2, 0, 0) + t.Logf("Test #2 passed | Successfully run validator node6 with --connect-ips set to node2, node3, node4, node5") +} + +func TestConnectionControllerNonValidatorCircularConnectIps(t *testing.T) { + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + + node1.Config.ConnectIPs = []string{"127.0.0.1:18001"} + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + + node1 = startNode(t, node1) + node2 = startNode(t, node2) + + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 2, 0, 1, 1) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node2, 2, 0, 1, 1) +} + +func TestNetworkManagerPersistentConnectorReconnect(t *testing.T) { + // Ensure that a node that is disconnected from a persistent connection will be reconnected to. + // Spawn three nodes: a non-validator node1, and node2, and a validator node3. Then set node1 connectIps + // to node2, node3, as well as a non-existing ip. Then we will stop node2, and wait for node1 to drop the + // connection. Then we will restart node2, and wait for node1 to reconnect to node2. We will repeat this + // process for node3. + + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + // Set TargetOutboundPeers to 0 to ensure the non-validator connector doesn't interfere. + node1.Config.TargetOutboundPeers = 0 + + node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + + node2 = startNode(t, node2) + node3 = startNode(t, node3) + + node1.Config.ConnectIPs = []string{ + node2.Listeners[0].Addr().String(), + node3.Listeners[0].Addr().String(), + "127.0.0.1:18003", + } + node1 = startNode(t, node1) + activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node3) + setActiveValidators(activeValidatorsMap, node1, node2, node3) + + waitForNonValidatorOutboundConnection(t, node1, node2) + waitForValidatorConnection(t, node1, node3) + waitForCountRemoteNodeIndexer(t, node1, 3, 1, 2, 0) + node2.Stop() + waitForCountRemoteNodeIndexer(t, node1, 2, 1, 1, 0) + // node1 should reopen the connection to node2, and it should be re-indexed as a non-validator (attempted). + waitForCountRemoteNodeIndexer(t, node1, 3, 1, 2, 0) + node2 = startNode(t, node2) + setActiveValidators(activeValidatorsMap, node2) + waitForCountRemoteNodeIndexer(t, node1, 3, 1, 2, 0) + t.Logf("Test #1 passed | Successfully run reconnect test with non-validator node1 with --connect-ips for node2") + + // Now we will do the same for node3. node3.Stop() - node4.Stop() - node5.Stop() - node6.Stop() - t.Logf("Test #2 passed | Successfully run validator node6 with --connect-ips set to node2, node3, node4, node5") + waitForCountRemoteNodeIndexer(t, node1, 2, 0, 2, 0) + // node1 should reopen the connection to node3, and it should be re-indexed as a non-validator (attempted). + waitForCountRemoteNodeIndexer(t, node1, 3, 0, 3, 0) + node3 = startNode(t, node3) + setActiveValidators(activeValidatorsMap, node3) + waitForValidatorConnection(t, node1, node3) + waitForCountRemoteNodeIndexer(t, node1, 3, 1, 2, 0) + t.Logf("Test #2 passed | Successfully run reconnect test with non-validator node1 with --connect-ips for node3") } func TestConnectionControllerValidatorConnector(t *testing.T) { - require := require.New(t) - t.Cleanup(func() { - setGetActiveValidatorImpl(lib.BasicGetActiveValidators) - }) - // Spawn 5 validators node1, node2, node3, node4, node5 and two non-validators node6 and node7. // All the validators are initially in the validator set. And later, node1 and node2 will be removed from the // validator set. Then, make node3 inactive, and node2 active again. Then, make all the validators inactive. // Make node6, and node7 connect-ips to all the validators. - blsPriv1, err := bls.NewPrivateKey() - require.NoError(err) - node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) - blsPriv2, err := bls.NewPrivateKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) - blsPriv3, err := bls.NewPrivateKey() - require.NoError(err) - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) - blsPriv5, err := bls.NewPrivateKey() - require.NoError(err) - node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + blsSeedPhrase1, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsSeedPhrase1) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + blsSeedPhrase5, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase5) node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") node7 := spawnNonValidatorNodeProtocol2(t, 18006, "node7") node1 = startNode(t, node1) - defer node1.Stop() node2 = startNode(t, node2) - defer node2.Stop() node3 = startNode(t, node3) - defer node3.Stop() node4 = startNode(t, node4) - defer node4.Stop() node5 = startNode(t, node5) - defer node5.Stop() - setGetActiveValidatorImplWithValidatorNodes(t, node1, node2, node3, node4, node5) node6.Config.ConnectIPs = []string{ node1.Listeners[0].Addr().String(), @@ -136,9 +186,9 @@ func TestConnectionControllerValidatorConnector(t *testing.T) { } node7.Config.ConnectIPs = node6.Config.ConnectIPs node6 = startNode(t, node6) - defer node6.Stop() node7 = startNode(t, node7) - defer node7.Stop() + activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node1, node2, node3, node4, node5) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7) // Verify full graph between active validators. waitForValidatorFullGraph(t, node1, node2, node3, node4, node5) @@ -162,7 +212,8 @@ func TestConnectionControllerValidatorConnector(t *testing.T) { t.Logf("Test #1 passed | Successfully run validators node1, node2, node3, node4, node5; non-validators node6, node7") // Remove node1 and node2 from the validator set. - setGetActiveValidatorImplWithValidatorNodes(t, node3, node4, node5) + activeValidatorsMap = getActiveValidatorsMapWithValidatorNodes(t, node3, node4, node5) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7) // Verify full graph between active validators. waitForValidatorFullGraph(t, node3, node4, node5) // Verify connections of non-validators. @@ -185,13 +236,14 @@ func TestConnectionControllerValidatorConnector(t *testing.T) { waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 3, 0, 2) } // Verify connection counts of non-validators. - waitForCountRemoteNodeIndexer(t, node6, 5, 3, 2, 0) - waitForCountRemoteNodeIndexer(t, node7, 5, 3, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node6, 5, 3, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node7, 5, 3, 2, 0) t.Logf("Test #2 passed | Successfully run validators node3, node4, node5; inactive-validators node1, node2; " + "non-validators node6, node7") // Remove node3 from the validator set. Make node1 active again. - setGetActiveValidatorImplWithValidatorNodes(t, node1, node4, node5) + activeValidatorsMap = getActiveValidatorsMapWithValidatorNodes(t, node1, node4, node5) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7) // Verify full graph between active validators. waitForValidatorFullGraph(t, node1, node4, node5) // Verify connections of non-validators. @@ -214,13 +266,14 @@ func TestConnectionControllerValidatorConnector(t *testing.T) { waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 3, 0, 2) } // Verify connection counts of non-validators. - waitForCountRemoteNodeIndexer(t, node6, 5, 3, 2, 0) - waitForCountRemoteNodeIndexer(t, node7, 5, 3, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node6, 5, 3, 2, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node7, 5, 3, 2, 0) t.Logf("Test #3 passed | Successfully run validators node1, node4, node5; inactive validators node2, node3; " + "non-validators node6, node7") // Make all validators inactive. - setGetActiveValidatorImplWithValidatorNodes(t) + activeValidatorsMap = getActiveValidatorsMapWithValidatorNodes(t) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7) // NOOP Verify full graph between active validators. // NOOP Verify connections of non-validators. // Verify connections of initial validators. @@ -246,18 +299,13 @@ func TestConnectionControllerValidatorConnector(t *testing.T) { waitForMinNonValidatorCountRemoteNodeIndexer(t, validator, 6, 0, 0, 2) } // Verify connection counts of non-validators. - waitForCountRemoteNodeIndexer(t, node6, 5, 0, 5, 0) - waitForCountRemoteNodeIndexer(t, node7, 5, 0, 5, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node6, 5, 0, 5, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node7, 5, 0, 5, 0) t.Logf("Test #4 passed | Successfully run inactive validators node1, node2, node3, node4, node5; " + "non-validators node6, node7") } func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { - require := require.New(t) - t.Cleanup(func() { - setGetActiveValidatorImpl(lib.BasicGetActiveValidators) - }) - // Spawn a non-validator node1, and two validators node2, node3. The validator nodes will have the same public key. // Node2 and node3 will not initially be in the validator set. First, node2 will start an outbound connection to // node1. We wait until the node2 is re-indexed as non-validator by node1, and then we make node3 open an outbound @@ -266,20 +314,17 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { // either node2 or node3 because of duplicate public key. node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") - blsPriv2, err := bls.NewPrivateKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv2) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase2) node1 = startNode(t, node1) - defer node1.Stop() node2 = startNode(t, node2) - defer node2.Stop() node3 = startNode(t, node3) - defer node3.Stop() cc2 := node2.Server.GetConnectionController() - require.NoError(cc2.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc2.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) // First wait for node2 to be indexed as a validator by node1. waitForValidatorConnection(t, node1, node2) // Now wait for node2 to be re-indexed as a non-validator. @@ -288,7 +333,7 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { // Now connect node3 to node1. cc3 := node3.Server.GetConnectionController() - require.NoError(cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) // First wait for node3 to be indexed as a validator by node1. waitForValidatorConnection(t, node1, node3) // Now wait for node3 to be re-indexed as a non-validator. @@ -296,15 +341,14 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { waitForNonValidatorOutboundConnection(t, node3, node1) // Now add node2 and node3 to the validator set. - setGetActiveValidatorImplWithValidatorNodes(t, node2) + activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node2) + setActiveValidators(activeValidatorsMap, node1, node2, node3) // Now wait for node1 to disconnect from either node2 or node3. - waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 1, 1, 0, 0) t.Logf("Test #1 passed | Successfully run non-validator node1; validators node2, node3 with duplicate public key") } -func TestConnectionControllerNonValidatorConnector(t *testing.T) { - require := require.New(t) - +func TestConnectionControllerNonValidatorConnectorOutbound(t *testing.T) { // Spawn 6 non-validators node1, node2, node3, node4, node5, node6. Set node1's targetOutboundPeers to 3. Then make // node1 create persistent outbound connections to node2, node3, and node4, as well as non-validator connections to // node5 and node6. @@ -317,15 +361,10 @@ func TestConnectionControllerNonValidatorConnector(t *testing.T) { node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") node2 = startNode(t, node2) - defer node2.Stop() node3 = startNode(t, node3) - defer node3.Stop() node4 = startNode(t, node4) - defer node4.Stop() node5 = startNode(t, node5) - defer node5.Stop() node6 = startNode(t, node6) - defer node6.Stop() node1.Config.ConnectIPs = []string{ node2.Listeners[0].Addr().String(), @@ -333,54 +372,134 @@ func TestConnectionControllerNonValidatorConnector(t *testing.T) { node4.Listeners[0].Addr().String(), } node1 = startNode(t, node1) - defer node1.Stop() cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node5.Listeners[0].Addr().String())) - require.NoError(cc.CreateNonValidatorOutboundConnection(node6.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node5.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node6.Listeners[0].Addr().String())) - waitForCountRemoteNodeIndexer(t, node1, 3, 0, 3, 0) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 3, 0, 3, 0) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorOutboundConnection(t, node1, node3) waitForNonValidatorOutboundConnection(t, node1, node4) } -func TestConnectionControllerNonValidatorCircularConnectIps(t *testing.T) { - node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") - node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") +func TestConnectionControllerNonValidatorConnectorInbound(t *testing.T) { + // Spawn validators node1, node2, node3, node4, node5, node6. Also spawn non-validators node7, node8, node9, node10. + // Set node1's targetOutboundPeers to 0 and targetInboundPeers to 1. Then make node1 create outbound connections to + // node2, node3, and make node4, node5, node6 create inbound connections to node1. Then make node1 create outbound + // connections to node7, node8, and make node9, node10 create inbound connections to node1. + blsSeedPhrase1, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsSeedPhrase1) + node1.Config.TargetOutboundPeers = 0 + node1.Config.MaxInboundPeers = 1 + node1.Params.DialTimeout = 1 * time.Second + node1.Params.VerackNegotiationTimeout = 1 * time.Second + node1.Params.VersionNegotiationTimeout = 1 * time.Second + + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node2.Config.GlogV = 0 + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + node3.Config.GlogV = 0 + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Config.GlogV = 0 + blsSeedPhrase5, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase5) + node5.Config.GlogV = 0 + blsSeedPhrase6, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsSeedPhrase6) + node6.Config.GlogV = 0 - node1.Config.ConnectIPs = []string{"127.0.0.1:18001"} - node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node7 := spawnNonValidatorNodeProtocol2(t, 18006, "node7") + node8 := spawnNonValidatorNodeProtocol2(t, 18007, "node8") + node9 := spawnNonValidatorNodeProtocol2(t, 18008, "node9") + node10 := spawnNonValidatorNodeProtocol2(t, 18009, "node10") node1 = startNode(t, node1) node2 = startNode(t, node2) - defer node1.Stop() - defer node2.Stop() + node3 = startNode(t, node3) + node4 = startNode(t, node4) + node5 = startNode(t, node5) + node6 = startNode(t, node6) + node7 = startNode(t, node7) + node8 = startNode(t, node8) + node9 = startNode(t, node9) + node10 = startNode(t, node10) + + // Connect node1 to node2, node3, node7, and node8. + cc1 := node1.Server.GetConnectionController() + require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) + require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node8.Listeners[0].Addr().String())) + // Connect node4, node5, node6 to node1. + cc4 := node4.Server.GetConnectionController() + require.NoError(t, cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + cc5 := node5.Server.GetConnectionController() + require.NoError(t, cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + cc6 := node6.Server.GetConnectionController() + require.NoError(t, cc6.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + + // Connect node9, node10 to node1. + cc9 := node9.Server.GetConnectionController() + require.NoError(t, cc9.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + cc10 := node10.Server.GetConnectionController() + require.NoError(t, cc10.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + + activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node1, node2, node3, node4, node5, node6) + setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7, node8, node9, node10) - waitForCountRemoteNodeIndexer(t, node1, 2, 0, 1, 1) - waitForCountRemoteNodeIndexer(t, node2, 2, 0, 1, 1) + waitForValidatorConnection(t, node1, node2) + waitForValidatorConnection(t, node1, node3) + waitForValidatorConnection(t, node1, node4) + waitForValidatorConnection(t, node1, node5) + waitForValidatorConnection(t, node1, node6) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 6, 5, 0, 1) } -func setGetActiveValidatorImplWithValidatorNodes(t *testing.T, validators ...*cmd.Node) { - require := require.New(t) +func TestConnectionControllerNonValidatorConnectorAddressMgr(t *testing.T) { + // Spawn a non-validator node1. Set node1's targetOutboundPeers to 2 and targetInboundPeers to 0. Then + // add two ip addresses to AddrMgr. Make sure that node1 creates outbound connections to these nodes. + node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Config.TargetOutboundPeers = 2 + node1.Config.MaxInboundPeers = 0 + + node1 = startNode(t, node1) + cc := node1.Server.GetConnectionController() + na1, err := cc.ConvertIPStringToNetAddress("deso-seed-2.io:17000") + na2, err := cc.ConvertIPStringToNetAddress("deso-seed-3.io:17000") + require.NoError(t, err) + cc.AddrMgr.AddAddress(na1, na1) + cc.AddrMgr.AddAddress(na2, na2) + waitForCountRemoteNodeIndexer(t, node1, 2, 0, 2, 0) +} - mapping := collections.NewConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry]() +func getActiveValidatorsMapWithValidatorNodes(t *testing.T, validators ...*cmd.Node) *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] { + mapping := collections.NewConcurrentMap[bls.SerializedPublicKey, consensus.Validator]() for _, validator := range validators { seed := validator.Config.PosValidatorSeed if seed == "" { t.Fatalf("Validator node %s does not have a PosValidatorSeed set", validator.Params.UserAgent) } keystore, err := lib.NewBLSKeystore(seed) - require.NoError(err) + require.NoError(t, err) mapping.Set(keystore.GetSigner().GetPublicKey().Serialize(), createSimpleValidatorEntry(validator)) } - setGetActiveValidatorImpl(func() *collections.ConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry] { - return mapping - }) + return mapping } -func setGetActiveValidatorImpl(mapping func() *collections.ConcurrentMap[bls.SerializedPublicKey, *lib.ValidatorEntry]) { - lib.GetActiveValidatorImpl = mapping +func setActiveValidators(validatorMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator], nodes ...*cmd.Node) { + for _, node := range nodes { + node.Server.GetConnectionController().SetActiveValidatorsMap(validatorMap) + } } func createSimpleValidatorEntry(node *cmd.Node) *lib.ValidatorEntry { diff --git a/integration_testing/connection_controller_test.go b/integration_testing/connection_controller_test.go index 58f4be33b..63976fbd4 100644 --- a/integration_testing/connection_controller_test.go +++ b/integration_testing/connection_controller_test.go @@ -4,22 +4,22 @@ import ( "github.com/deso-protocol/core/bls" "github.com/deso-protocol/core/lib" "github.com/stretchr/testify/require" + "github.com/tyler-smith/go-bip39" "testing" ) func TestConnectionControllerNonValidator(t *testing.T) { - require := require.New(t) - node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.DisableNetworkManagerRoutines = true node1 = startNode(t, node1) - defer node1.Stop() // Make sure NonValidator Node1 can create an outbound connection to NonValidator Node2 node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) @@ -28,13 +28,14 @@ func TestConnectionControllerNonValidator(t *testing.T) { t.Logf("Test #1 passed | Successfully created outbound connection from NonValidator Node1 to NonValidator Node2") // Make sure NonValidator Node1 can create an outbound connection to validator Node3 - blsPriv3, err := bls.NewPrivateKey() - require.NoError(err) - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) @@ -43,37 +44,38 @@ func TestConnectionControllerNonValidator(t *testing.T) { t.Logf("Test #2 passed | Successfully created outbound connection from NonValidator Node1 to Validator Node3") // Make sure NonValidator Node1 can create a non-validator connection to validator Node4 - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - defer node4.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForNonValidatorInboundConnection(t, node4, node1) t.Logf("Test #3 passed | Successfully created outbound connection from NonValidator Node1 to Validator Node4") } func TestConnectionControllerValidator(t *testing.T) { - require := require.New(t) - - blsPriv1, err := bls.NewPrivateKey() - require.NoError(err) - node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) + blsSeedPhrase1, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsSeedPhrase1) + node1.Params.DisableNetworkManagerRoutines = true node1 = startNode(t, node1) - defer node1.Stop() // Make sure Validator Node1 can create an outbound connection to Validator Node2 - blsPriv2, err := bls.NewPrivateKey() - blsPub2 := blsPriv2.PublicKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore2, err := lib.NewBLSKeystore(blsSeedPhrase2) + require.NoError(t, err) + blsPub2 := blsKeyStore2.GetSigner().GetPublicKey() + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPub2)) + require.NoError(t, cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPub2)) waitForValidatorConnection(t, node1, node2) waitForValidatorConnection(t, node2, node1) @@ -83,10 +85,11 @@ func TestConnectionControllerValidator(t *testing.T) { // Make sure Validator Node1 can create an outbound connection to NonValidator Node3 node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node3) waitForValidatorConnection(t, node3, node1) @@ -95,125 +98,123 @@ func TestConnectionControllerValidator(t *testing.T) { t.Logf("Test #2 passed | Successfully created outbound connection from Validator Node1 to NonValidator Node3") // Make sure Validator Node1 can create an outbound non-validator connection to Validator Node4 - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - defer node4.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForValidatorConnection(t, node4, node1) t.Logf("Test #3 passed | Successfully created non-validator outbound connection from Validator Node1 to Validator Node4") } func TestConnectionControllerHandshakeDataErrors(t *testing.T) { - require := require.New(t) - - blsPriv1, err := bls.NewPrivateKey() - require.NoError(err) - node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsPriv1) + blsSeedPhrase1, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node1 := spawnValidatorNodeProtocol2(t, 18000, "node1", blsSeedPhrase1) + node1.Params.DisableNetworkManagerRoutines = true // This node should have ProtocolVersion2, but it has ProtocolVersion1 as we want it to disconnect. - blsPriv2, err := bls.NewPrivateKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node2.Params.DisableNetworkManagerRoutines = true node2.Params.ProtocolVersion = lib.ProtocolVersion1 node1 = startNode(t, node1) node2 = startNode(t, node2) - defer node1.Stop() - defer node2.Stop() cc := node2.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node2) t.Logf("Test #1 passed | Successfuly disconnected node with SFValidator flag and ProtocolVersion1 mismatch") // This node shouldn't have ProtocolVersion3, which is beyond latest ProtocolVersion2, meaning nodes should disconnect. - blsPriv3, err := bls.NewPrivateKey() - require.NoError(err) - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + node3.Params.DisableNetworkManagerRoutines = true node3.Params.ProtocolVersion = lib.ProtocolVersionType(3) node3 = startNode(t, node3) - defer node3.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node3) t.Logf("Test #2 passed | Successfuly disconnected node with ProtocolVersion3") // This node shouldn't have ProtocolVersion0, which is outdated. node4 := spawnNonValidatorNodeProtocol2(t, 18003, "node4") + node4.Params.DisableNetworkManagerRoutines = true node4.Params.ProtocolVersion = lib.ProtocolVersion0 node4 = startNode(t, node4) - defer node4.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node4) t.Logf("Test #3 passed | Successfuly disconnected node with ProtocolVersion0") // This node will have a different public key than the one it's supposed to have. - blsPriv5, err := bls.NewPrivateKey() - require.NoError(err) - blsPriv5Wrong, err := bls.NewPrivateKey() - require.NoError(err) - node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + blsSeedPhrase5, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsSeedPhrase5Wrong, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore5Wrong, err := lib.NewBLSKeystore(blsSeedPhrase5Wrong) + require.NoError(t, err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase5) + node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - defer node5.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsPriv5Wrong.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5Wrong.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node5) t.Logf("Test #4 passed | Successfuly disconnected node with public key mismatch") // This node will be missing SFPosValidator flag while being connected as a validator. blsPriv6, err := bls.NewPrivateKey() - require.NoError(err) + require.NoError(t, err) node6 := spawnNonValidatorNodeProtocol2(t, 18005, "node6") + node6.Params.DisableNetworkManagerRoutines = true node6 = startNode(t, node6) - defer node6.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateValidatorConnection(node6.Listeners[0].Addr().String(), blsPriv6.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node6.Listeners[0].Addr().String(), blsPriv6.PublicKey())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node6) t.Logf("Test #5 passed | Successfuly disconnected supposed validator node with missing SFPosValidator flag") // This node will have ProtocolVersion1 and be connected as an outbound non-validator node. node7 := spawnNonValidatorNodeProtocol2(t, 18006, "node7") + node7.Params.DisableNetworkManagerRoutines = true node7.Params.ProtocolVersion = lib.ProtocolVersion1 node7 = startNode(t, node7) - defer node7.Stop() cc = node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node7) t.Logf("Test #6 passed | Successfuly disconnected outbound non-validator node with ProtocolVersion1") } func TestConnectionControllerHandshakeTimeouts(t *testing.T) { - require := require.New(t) - // Set version negotiation timeout to 0 to make sure that the node will be disconnected node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.DisableNetworkManagerRoutines = true node1.Params.VersionNegotiationTimeout = 0 node1 = startNode(t, node1) - defer node1.Stop() node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) - defer node2.Stop() cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node2) t.Logf("Test #1 passed | Successfuly disconnected node after version negotiation timeout") @@ -221,66 +222,70 @@ func TestConnectionControllerHandshakeTimeouts(t *testing.T) { // Now let's try timing out verack exchange node1.Params.VersionNegotiationTimeout = lib.DeSoTestnetParams.VersionNegotiationTimeout node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3.Params.DisableNetworkManagerRoutines = true node3.Params.VerackNegotiationTimeout = 0 node3 = startNode(t, node3) - defer node3.Stop() cc = node3.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node3) t.Logf("Test #2 passed | Successfuly disconnected node after verack exchange timeout") // Now let's try timing out handshake between two validators node4 and node5 - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4.Params.HandshakeTimeoutMicroSeconds = 0 node4 = startNode(t, node4) - defer node4.Stop() - blsPriv5, err := bls.NewPrivateKey() - require.NoError(err) - node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv5) + blsSeedPhrase5, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore5, err := lib.NewBLSKeystore(blsSeedPhrase5) + require.NoError(t, err) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase5) + node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - defer node5.Stop() cc = node4.Server.GetConnectionController() - require.NoError(cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsPriv5.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node5) t.Logf("Test #3 passed | Successfuly disconnected validator node after handshake timeout") } func TestConnectionControllerValidatorDuplication(t *testing.T) { - require := require.New(t) - node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.DisableNetworkManagerRoutines = true node1 = startNode(t, node1) - defer node1.Stop() // Create a validator Node2 - blsPriv2, err := bls.NewPrivateKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore2, err := lib.NewBLSKeystore(blsSeedPhrase2) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) // Create a duplicate validator Node3 - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv2) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase2) + node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) // Create validator connection from Node1 to Node2 and from Node1 to Node3 cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPriv2.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) // This should fail out right because Node3 has a duplicate public key. - require.Error(cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsPriv2.PublicKey())) + require.Error(t, cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) waitForValidatorConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) // Now create an outbound connection from Node3 to Node1, which should pass handshake, but then fail because // Node1 already has a validator connection to Node2 with the same public key. cc3 := node3.Server.GetConnectionController() - require.NoError(cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node3) waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) t.Logf("Test #1 passed | Successfuly rejected duplicate validator connection with inbound/outbound validators") @@ -290,56 +295,58 @@ func TestConnectionControllerValidatorDuplication(t *testing.T) { waitForEmptyRemoteNodeIndexer(t, node1) // Create two more validators Node4, Node5 with duplicate public keys - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - defer node4.Stop() - node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsPriv4) + node5 := spawnValidatorNodeProtocol2(t, 18004, "node5", blsSeedPhrase4) + node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - defer node5.Stop() // Create validator connections from Node4 to Node1 and from Node5 to Node1 cc4 := node4.Server.GetConnectionController() - require.NoError(cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForNonValidatorOutboundConnection(t, node4, node1) cc5 := node5.Server.GetConnectionController() - require.NoError(cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node5) waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) t.Logf("Test #2 passed | Successfuly rejected duplicate validator connection with multiple outbound validators") } func TestConnectionControllerProtocolDifference(t *testing.T) { - require := require.New(t) - // Create a ProtocolVersion1 Node1 node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.DisableNetworkManagerRoutines = true node1.Params.ProtocolVersion = lib.ProtocolVersion1 node1 = startNode(t, node1) - defer node1.Stop() // Create a ProtocolVersion2 NonValidator Node2 node2 := spawnNonValidatorNodeProtocol2(t, 18001, "node2") + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) // Create non-validator connection from Node1 to Node2 cc := node1.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) t.Logf("Test #1 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 non-validator") // Create a ProtocolVersion2 Validator Node3 - blsPriv3, err := bls.NewPrivateKey() - require.NoError(err) - node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsPriv3) + blsSeedPhrase3, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore3, err := lib.NewBLSKeystore(blsSeedPhrase3) + require.NoError(t, err) + node3 := spawnValidatorNodeProtocol2(t, 18002, "node3", blsSeedPhrase3) + node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) // Create validator connection from Node1 to Node3 - require.NoError(cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsPriv3.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore3.GetSigner().GetPublicKey())) waitForValidatorConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) t.Logf("Test #2 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 validator") @@ -349,55 +356,57 @@ func TestConnectionControllerProtocolDifference(t *testing.T) { waitForEmptyRemoteNodeIndexer(t, node1) // Create a ProtocolVersion2 validator Node4 - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + blsKeyStore4, err := lib.NewBLSKeystore(blsSeedPhrase4) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - defer node4.Stop() // Attempt to create non-validator connection from Node4 to Node1 cc = node4.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #3 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") // Attempt to create validator connection from Node4 to Node1 - require.NoError(cc.CreateValidatorConnection(node1.Listeners[0].Addr().String(), blsPriv4.PublicKey())) + require.NoError(t, cc.CreateValidatorConnection(node1.Listeners[0].Addr().String(), blsKeyStore4.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #4 passed | Successfuly rejected validator connection from ProtocolVersion2 node to ProtcolVersion1 node") // Create a ProtocolVersion2 non-validator Node5 node5 := spawnNonValidatorNodeProtocol2(t, 18004, "node5") + node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - defer node5.Stop() // Attempt to create non-validator connection from Node5 to Node1 cc = node5.Server.GetConnectionController() - require.NoError(cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node5) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #5 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") } func TestConnectionControllerPersistentConnection(t *testing.T) { - require := require.New(t) - // Create a NonValidator Node1 node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1.Params.DisableNetworkManagerRoutines = true node1 = startNode(t, node1) // Create a Validator Node2 - blsPriv2, err := bls.NewPrivateKey() - require.NoError(err) - node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsPriv2) + blsSeedPhrase2, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node2 := spawnValidatorNodeProtocol2(t, 18001, "node2", blsSeedPhrase2) + node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) // Create a persistent connection from Node1 to Node2 cc := node1.Server.GetConnectionController() _, err = cc.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String()) - require.NoError(err) + require.NoError(t, err) waitForValidatorConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) node2.Stop() @@ -406,11 +415,12 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { // Create a Non-validator Node3 node3 := spawnNonValidatorNodeProtocol2(t, 18002, "node3") + node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) // Create a persistent connection from Node1 to Node3 _, err = cc.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String()) - require.NoError(err) + require.NoError(t, err) waitForNonValidatorOutboundConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) node3.Stop() @@ -419,20 +429,21 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { t.Logf("Test #2 passed | Successfuly created persistent connection from non-validator Node1 to non-validator Node3") // Create a Validator Node4 - blsPriv4, err := bls.NewPrivateKey() - require.NoError(err) - node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsPriv4) + blsSeedPhrase4, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node4 := spawnValidatorNodeProtocol2(t, 18003, "node4", blsSeedPhrase4) + node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - defer node4.Stop() // Create a non-validator Node5 node5 := spawnNonValidatorNodeProtocol2(t, 18004, "node5") + node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) // Create a persistent connection from Node4 to Node5 cc = node4.Server.GetConnectionController() _, err = cc.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String()) - require.NoError(err) + require.NoError(t, err) waitForNonValidatorOutboundConnection(t, node4, node5) waitForValidatorConnection(t, node5, node4) node5.Stop() @@ -440,15 +451,15 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { t.Logf("Test #3 passed | Successfuly created persistent connection from validator Node4 to non-validator Node5") // Create a Validator Node6 - blsPriv6, err := bls.NewPrivateKey() - require.NoError(err) - node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsPriv6) + blsSeedPhrase6, err := bip39.NewMnemonic(lib.RandomBytes(32)) + require.NoError(t, err) + node6 := spawnValidatorNodeProtocol2(t, 18005, "node6", blsSeedPhrase6) + node6.Params.DisableNetworkManagerRoutines = true node6 = startNode(t, node6) - defer node6.Stop() // Create a persistent connection from Node4 to Node6 _, err = cc.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String()) - require.NoError(err) + require.NoError(t, err) waitForValidatorConnection(t, node4, node6) waitForValidatorConnection(t, node6, node4) t.Logf("Test #4 passed | Successfuly created persistent connection from validator Node4 to validator Node6") diff --git a/integration_testing/connection_controller_utils_test.go b/integration_testing/connection_controller_utils_test.go index 43cf418bc..94f4702c6 100644 --- a/integration_testing/connection_controller_utils_test.go +++ b/integration_testing/connection_controller_utils_test.go @@ -2,7 +2,6 @@ package integration_testing import ( "fmt" - "github.com/deso-protocol/core/bls" "github.com/deso-protocol/core/cmd" "github.com/deso-protocol/core/lib" "os" @@ -129,6 +128,18 @@ func waitForCountRemoteNodeIndexer(t *testing.T, node1 *cmd.Node, allCount int, waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have appropriate RemoteNodes counts", userAgent), condition) } +func waitForCountRemoteNodeIndexerHandshakeCompleted(t *testing.T, node1 *cmd.Node, allCount, validatorCount int, + nonValidatorOutboundCount int, nonValidatorInboundCount int) { + + userAgent := node1.Params.UserAgent + rnManager := node1.Server.GetConnectionController().GetRemoteNodeManager() + condition := func() bool { + return checkRemoteNodeIndexerCountHandshakeCompleted(rnManager, allCount, validatorCount, + nonValidatorOutboundCount, nonValidatorInboundCount) + } + waitForCondition(t, fmt.Sprintf("Waiting for Node (%s) to have appropriate RemoteNodes counts", userAgent), condition) +} + func checkRemoteNodeIndexerUserAgent(manager *lib.RemoteNodeManager, userAgent string, validator bool, nonValidatorOutbound bool, nonValidatorInbound bool) bool { @@ -167,6 +178,42 @@ func checkRemoteNodeIndexerCount(manager *lib.RemoteNodeManager, allCount int, v return true } +func checkRemoteNodeIndexerCountHandshakeCompleted(manager *lib.RemoteNodeManager, allCount int, validatorCount int, + nonValidatorOutboundCount int, nonValidatorInboundCount int) bool { + + if allCount != manager.GetAllRemoteNodes().Count() { + return false + } + if validatorCount != manager.GetValidatorIndex().Count() { + return false + } + for _, rn := range manager.GetValidatorIndex().GetAll() { + if !rn.IsHandshakeCompleted() { + return false + } + } + + if nonValidatorOutboundCount != manager.GetNonValidatorOutboundIndex().Count() { + return false + } + for _, rn := range manager.GetNonValidatorOutboundIndex().GetAll() { + if !rn.IsHandshakeCompleted() { + return false + } + } + + if nonValidatorInboundCount != manager.GetNonValidatorInboundIndex().Count() { + return false + } + for _, rn := range manager.GetNonValidatorInboundIndex().GetAll() { + if !rn.IsHandshakeCompleted() { + return false + } + } + + return true +} + func checkRemoteNodeIndexerEmpty(manager *lib.RemoteNodeManager) bool { if manager.GetAllRemoteNodes().Count() != 0 { return false @@ -219,14 +266,14 @@ func spawnNonValidatorNodeProtocol2(t *testing.T, port uint32, id string) *cmd.N return node } -func spawnValidatorNodeProtocol2(t *testing.T, port uint32, id string, blsPriv *bls.PrivateKey) *cmd.Node { +func spawnValidatorNodeProtocol2(t *testing.T, port uint32, id string, blsSeedPhrase string) *cmd.Node { dbDir := getDirectory(t) t.Cleanup(func() { os.RemoveAll(dbDir) }) config := generateConfig(t, port, dbDir, 10) config.SyncType = lib.NodeSyncTypeBlockSync - config.PosValidatorSeed = blsPriv.ToString() + config.PosValidatorSeed = blsSeedPhrase node := cmd.NewNode(config) node.Params.UserAgent = id node.Params.ProtocolVersion = lib.ProtocolVersion2 diff --git a/integration_testing/tools.go b/integration_testing/tools.go index 4db913136..df9aad581 100644 --- a/integration_testing/tools.go +++ b/integration_testing/tools.go @@ -486,7 +486,7 @@ func waitForCondition(t *testing.T, id string, condition func() bool) { signalChan <- struct{}{} return } - time.Sleep(1 * time.Millisecond) + time.Sleep(100 * time.Millisecond) } }() diff --git a/lib/block_view_validator.go b/lib/block_view_validator.go index b6a662676..623a67ae2 100644 --- a/lib/block_view_validator.go +++ b/lib/block_view_validator.go @@ -93,6 +93,10 @@ func (validatorEntry *ValidatorEntry) GetStakeAmount() *uint256.Int { return validatorEntry.TotalStakeAmountNanos } +func (validatorEntry *ValidatorEntry) GetDomains() [][]byte { + return validatorEntry.Domains +} + func (validatorEntry *ValidatorEntry) Status() ValidatorStatus { // ValidatorEntry.Status() is a virtual/derived field that is not stored in // the database, but instead constructed from other ValidatorEntry fields. diff --git a/lib/connection_controller.go b/lib/connection_controller.go index 4bda85507..ac8467b09 100644 --- a/lib/connection_controller.go +++ b/lib/connection_controller.go @@ -6,6 +6,7 @@ import ( "github.com/btcsuite/btcd/wire" "github.com/deso-protocol/core/bls" "github.com/deso-protocol/core/collections" + "github.com/deso-protocol/core/consensus" "github.com/golang/glog" "github.com/pkg/errors" "net" @@ -14,14 +15,6 @@ import ( "time" ) -type GetActiveValidatorsFunc func() *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry] - -var GetActiveValidatorImpl GetActiveValidatorsFunc = BasicGetActiveValidators - -func BasicGetActiveValidators() *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry] { - return collections.NewConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]() -} - // ConnectionController is a structure that oversees all connections to remote nodes. It is responsible for kicking off // the initial connections a node makes to the network. It is also responsible for creating RemoteNodes from all // successful outbound and inbound connections. The ConnectionController also ensures that the node is connected to @@ -48,7 +41,12 @@ type ConnectionController struct { // persistentIpToRemoteNodeIdsMap maps persistent IP addresses, like the --connect-ips, to the RemoteNodeIds of the // corresponding RemoteNodes. This is used to ensure that we don't connect to the same persistent IP address twice. // And that we can reconnect to the same persistent IP address if we disconnect from it. - persistentIpToRemoteNodeIdsMap map[string]RemoteNodeId + persistentIpToRemoteNodeIdsMap *collections.ConcurrentMap[string, RemoteNodeId] + + activeValidatorsMapLock sync.RWMutex + // activeValidatorsMap is a map of all currently active validators registered in consensus. It will be updated + // periodically by the owner of the ConnectionController. + activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] // The target number of non-validator outbound remote nodes we want to have. We will disconnect remote nodes once // we've exceeded this number of outbound connections. @@ -78,7 +76,8 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh rnManager: rnManager, AddrMgr: addrMgr, connectIps: connectIps, - persistentIpToRemoteNodeIdsMap: make(map[string]RemoteNodeId), + persistentIpToRemoteNodeIdsMap: collections.NewConcurrentMap[string, RemoteNodeId](), + activeValidatorsMap: collections.NewConcurrentMap[bls.SerializedPublicKey, consensus.Validator](), targetNonValidatorOutboundRemoteNodes: targetNonValidatorOutboundRemoteNodes, targetNonValidatorInboundRemoteNodes: targetNonValidatorInboundRemoteNodes, limitOneInboundRemoteNodePerIP: limitOneInboundConnectionPerIP, @@ -87,18 +86,26 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh } func (cc *ConnectionController) Start() { - cc.startGroup.Add(3) + if cc.params.DisableNetworkManagerRoutines { + return + } + + cc.startGroup.Add(4) go cc.startPersistentConnector() go cc.startValidatorConnector() go cc.startNonValidatorConnector() + go cc.startRemoteNodeCleanup() cc.startGroup.Wait() - cc.exitGroup.Add(3) + cc.exitGroup.Add(4) } func (cc *ConnectionController) Stop() { - close(cc.exitChan) - cc.exitGroup.Wait() + if !cc.params.DisableNetworkManagerRoutines { + close(cc.exitChan) + cc.exitGroup.Wait() + } + cc.rnManager.DisconnectAll() } func (cc *ConnectionController) GetRemoteNodeManager() *RemoteNodeManager { @@ -131,7 +138,7 @@ func (cc *ConnectionController) startValidatorConnector() { cc.exitGroup.Done() return case <-time.After(1 * time.Second): - activeValidatorsMap := GetActiveValidatorImpl() + activeValidatorsMap := cc.getActiveValidatorsMap() cc.refreshValidatorIndex(activeValidatorsMap) cc.connectValidators(activeValidatorsMap) } @@ -158,6 +165,21 @@ func (cc *ConnectionController) startNonValidatorConnector() { } } +func (cc *ConnectionController) startRemoteNodeCleanup() { + cc.startGroup.Done() + + for { + select { + case <-cc.exitChan: + cc.exitGroup.Done() + return + case <-time.After(1 * time.Second): + cc.rnManager.Cleanup() + } + } + +} + // ########################### // ## Handlers (Peer, DeSoMessage) // ########################### @@ -167,11 +189,14 @@ func (cc *ConnectionController) _handleDonePeerMessage(origin *Peer, desoMsg DeS return } + glog.V(2).Infof("ConnectionController.handleDonePeerMessage: Handling disconnected peer message for "+ + "id=%v", origin.ID) cc.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) // Update the persistentIpToRemoteNodeIdsMap. - for ip, id := range cc.persistentIpToRemoteNodeIdsMap { + ipRemoteNodeIdMap := cc.persistentIpToRemoteNodeIdsMap.ToMap() + for ip, id := range ipRemoteNodeIdMap { if id.ToUint64() == origin.ID { - delete(cc.persistentIpToRemoteNodeIdsMap, ip) + cc.persistentIpToRemoteNodeIdsMap.Remove(ip) } } } @@ -228,6 +253,7 @@ func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMs } func (cc *ConnectionController) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection Connection) { + glog.V(2).Infof("ConnectionController.cleanupFailedInboundConnection: Cleaning up failed inbound connection") if remoteNode != nil { cc.rnManager.Disconnect(remoteNode) } @@ -239,6 +265,7 @@ func (cc *ConnectionController) cleanupFailedOutboundConnection(connection Conne if !ok { return } + glog.V(2).Infof("ConnectionController.cleanupFailedOutboundConnection: Cleaning up failed outbound connection") id := NewRemoteNodeId(oc.attemptId) rn := cc.rnManager.GetRemoteNodeById(id) @@ -257,7 +284,7 @@ func (cc *ConnectionController) refreshConnectIps() { // Connect to addresses passed via the --connect-ips flag. These addresses are persistent in the sense that if we // disconnect from one, we will try to reconnect to the same one. for _, connectIp := range cc.connectIps { - if _, ok := cc.persistentIpToRemoteNodeIdsMap[connectIp]; ok { + if _, ok := cc.persistentIpToRemoteNodeIdsMap.Get(connectIp); ok { continue } @@ -269,7 +296,7 @@ func (cc *ConnectionController) refreshConnectIps() { continue } - cc.persistentIpToRemoteNodeIdsMap[connectIp] = id + cc.persistentIpToRemoteNodeIdsMap.Set(connectIp, id) } } @@ -277,13 +304,26 @@ func (cc *ConnectionController) refreshConnectIps() { // ## Validator Connections // ########################### +func (cc *ConnectionController) SetActiveValidatorsMap(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { + cc.activeValidatorsMapLock.Lock() + defer cc.activeValidatorsMapLock.Unlock() + cc.activeValidatorsMap = activeValidatorsMap.Clone() + +} + +func (cc *ConnectionController) getActiveValidatorsMap() *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] { + cc.activeValidatorsMapLock.RLock() + defer cc.activeValidatorsMapLock.RUnlock() + return cc.activeValidatorsMap.Clone() +} + // refreshValidatorIndex re-indexes validators based on the activeValidatorsMap. It is called periodically by the // validator connector. -func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]) { +func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { // De-index inactive validators. We skip any checks regarding RemoteNodes connection status, nor do we verify whether // de-indexing the validator would result in an excess number of outbound/inbound connections. Any excess connections // will be cleaned up by the peer connector. - validatorRemoteNodeMap := cc.rnManager.GetValidatorIndex().Copy() + validatorRemoteNodeMap := cc.rnManager.GetValidatorIndex().ToMap() for pk, rn := range validatorRemoteNodeMap { // If the validator is no longer active, de-index it. if _, ok := activeValidatorsMap.Get(pk); !ok { @@ -306,6 +346,8 @@ func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *colle // public key, which goes undetected during handshake. To prevent this from affecting the indexing of the validator // set, we check that the non-validator's public key is not already present in the validator index. if _, ok := cc.rnManager.GetValidatorIndex().Get(pk.Serialize()); ok { + glog.V(2).Infof("ConnectionController.refreshValidatorIndex: Disconnecting Validator RemoteNode "+ + "(%v) has validator public key (%v) that is already present in validator index", rn, pk) cc.rnManager.Disconnect(rn) continue } @@ -320,13 +362,13 @@ func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *colle // connectValidators attempts to connect to all active validators that are not already connected. It is called // periodically by the validator connector. -func (cc *ConnectionController) connectValidators(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, *ValidatorEntry]) { +func (cc *ConnectionController) connectValidators(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { // Look through the active validators and connect to any that we're not already connected to. if cc.blsKeystore == nil { return } - validators := activeValidatorsMap.Copy() + validators := activeValidatorsMap.ToMap() for pk, validator := range validators { _, exists := cc.rnManager.GetValidatorIndex().Get(pk) // If we're already connected to the validator, continue. @@ -343,7 +385,10 @@ func (cc *ConnectionController) connectValidators(activeValidatorsMap *collectio } // For now, we only dial the first domain in the validator's domain list. - address := string(validator.Domains[0]) + if len(validator.GetDomains()) == 0 { + continue + } + address := string(validator.GetDomains()[0]) if err := cc.CreateValidatorConnection(address, publicKey); err != nil { glog.V(2).Infof("ConnectionController.connectValidators: Problem connecting to validator %v: %v", address, err) continue @@ -374,8 +419,8 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { allOutboundRemoteNodes := cc.rnManager.GetNonValidatorOutboundIndex().GetAll() var attemptedOutboundRemoteNodes, connectedOutboundRemoteNodes []*RemoteNode for _, rn := range allOutboundRemoteNodes { - if rn.IsPersistent() { - // We do nothing for persistent remote nodes. + if rn.IsPersistent() || rn.IsExpectedValidator() { + // We do nothing for persistent remote nodes or expected validators. continue } else if rn.IsHandshakeCompleted() { connectedOutboundRemoteNodes = append(connectedOutboundRemoteNodes, rn) @@ -397,6 +442,8 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { if excessiveOutboundRemoteNodes == 0 { break } + glog.V(2).Infof("ConnectionController.refreshNonValidatorOutboundIndex: Disconnecting attempted remote "+ + "node (id=%v) due to excess outbound peers", rn.GetId()) cc.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } @@ -405,6 +452,8 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { if excessiveOutboundRemoteNodes == 0 { break } + glog.V(2).Infof("ConnectionController.refreshNonValidatorOutboundIndex: Disconnecting connected remote "+ + "node (id=%v) due to excess outbound peers", rn.GetId()) cc.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } @@ -415,16 +464,30 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { func (cc *ConnectionController) refreshNonValidatorInboundIndex() { // First let's check if we have an excess number of inbound remote nodes. If we do, we'll disconnect some of them. numConnectedInboundRemoteNodes := uint32(cc.rnManager.GetNonValidatorInboundIndex().Count()) - excessiveInboundRemoteNodes := uint32(0) - if numConnectedInboundRemoteNodes > cc.targetNonValidatorInboundRemoteNodes { - excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - cc.targetNonValidatorInboundRemoteNodes + if numConnectedInboundRemoteNodes <= cc.targetNonValidatorInboundRemoteNodes { + return } + // Disconnect random inbound non-validators if we have too many of them. inboundRemoteNodes := cc.rnManager.GetNonValidatorInboundIndex().GetAll() + var connectedInboundRemoteNodes []*RemoteNode for _, rn := range inboundRemoteNodes { + // We only want to disconnect remote nodes that have completed handshake. + if rn.IsHandshakeCompleted() { + connectedInboundRemoteNodes = append(connectedInboundRemoteNodes, rn) + } + } + + excessiveInboundRemoteNodes := uint32(0) + if numConnectedInboundRemoteNodes > cc.targetNonValidatorInboundRemoteNodes { + excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - cc.targetNonValidatorInboundRemoteNodes + } + for _, rn := range connectedInboundRemoteNodes { if excessiveInboundRemoteNodes == 0 { break } + glog.V(2).Infof("ConnectionController.refreshNonValidatorInboundIndex: Disconnecting inbound remote "+ + "node (id=%v) due to excess inbound peers", rn.GetId()) cc.rnManager.Disconnect(rn) excessiveInboundRemoteNodes-- } @@ -444,7 +507,8 @@ func (cc *ConnectionController) connectNonValidators() { } cc.AddrMgr.Attempt(addr) if err := cc.rnManager.CreateNonValidatorOutboundConnection(addr); err != nil { - glog.V(2).Infof("ConnectionController.connectNonValidators: Problem connecting to addr %v: %v", addr, err) + glog.V(2).Infof("ConnectionController.connectNonValidators: Problem creating non-validator outbound "+ + "connection to addr: %v; err: %v", addr, err) } } } @@ -504,14 +568,6 @@ func (cc *ConnectionController) SetTargetOutboundPeers(numPeers uint32) { cc.targetNonValidatorOutboundRemoteNodes = numPeers } -func (cc *ConnectionController) enoughNonValidatorInboundConnections() bool { - return uint32(cc.rnManager.GetNonValidatorInboundIndex().Count()) >= cc.targetNonValidatorInboundRemoteNodes -} - -func (cc *ConnectionController) enoughNonValidatorOutboundConnections() bool { - return uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) >= cc.targetNonValidatorOutboundRemoteNodes -} - // processInboundConnection is called when a new inbound connection is established. At this point, the connection is not validated, // nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. // Once the RemoteNode is created, we will initiate handshake. @@ -522,12 +578,6 @@ func (cc *ConnectionController) processInboundConnection(conn Connection) (*Remo return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Connection is not an inboundConnection") } - // Reject the peer if we have too many inbound connections already. - if cc.enoughNonValidatorInboundConnections() { - return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Rejecting INBOUND peer (%s) due to max "+ - "inbound peers (%d) hit", ic.connection.RemoteAddr().String(), cc.targetNonValidatorInboundRemoteNodes) - } - // If we want to limit inbound connections to one per IP address, check to make sure this address isn't already connected. if cc.limitOneInboundRemoteNodePerIP && cc.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { @@ -572,12 +622,6 @@ func (cc *ConnectionController) processOutboundConnection(conn Connection) (*Rem cc.AddrMgr.Good(oc.address) } - // if this is a non-persistent outbound peer, and we already have enough outbound peers, then don't bother adding this one. - if !oc.isPersistent && cc.enoughNonValidatorOutboundConnections() { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connected to maximum number of outbound "+ - "peers (%d)", cc.targetNonValidatorOutboundRemoteNodes) - } - // If this is a non-persistent outbound peer and the group key overlaps with another peer we're already connected to then // abort mission. We only connect to one peer per IP group in order to prevent Sybil attacks. if !oc.isPersistent && cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { @@ -599,19 +643,13 @@ func (cc *ConnectionController) processOutboundConnection(conn Connection) (*Rem } // If this is a persistent remote node or a validator, we don't need to do any extra connection validation. - if remoteNode.IsPersistent() || remoteNode.GetValidatorPublicKey() != nil { + if remoteNode.IsPersistent() || remoteNode.IsExpectedValidator() { return remoteNode, nil } // If we get here, it means we're dealing with a non-persistent or non-validator remote node. We perform additional // connection validation. - // If we already have enough outbound peers, then don't bother adding this one. - if cc.enoughNonValidatorOutboundConnections() { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connected to maximum number of outbound "+ - "peers (%d)", cc.targetNonValidatorOutboundRemoteNodes) - } - // If the group key overlaps with another peer we're already connected to then abort mission. We only connect to // one peer per IP group in order to prevent Sybil attacks. if cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { diff --git a/lib/connection_manager.go b/lib/connection_manager.go index 1ba4bf8f1..761fb048e 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -615,9 +615,11 @@ func (cmgr *ConnectionManager) Start() { "(id= %v)", oc.attemptId) } else { glog.V(2).Infof("ConnectionManager.Start: Successfully established an outbound connection with "+ - "(addr= %v)", oc.connection.RemoteAddr()) + "(addr= %v) (id= %v)", oc.connection.RemoteAddr(), oc.attemptId) } + cmgr.mtxConnectionAttempts.Lock() delete(cmgr.outboundConnectionAttempts, oc.attemptId) + cmgr.mtxConnectionAttempts.Unlock() cmgr.serverMessageQueue <- &ServerMessage{ Peer: nil, Msg: &MsgDeSoNewConnection{ @@ -639,7 +641,7 @@ func (cmgr *ConnectionManager) Start() { // has already been called, since that is what's responsible for adding the peer // to this queue in the first place. - glog.V(1).Infof("Done with peer (%v).", pp) + glog.V(1).Infof("Done with peer (id=%v).", pp.ID) // Remove the peer from our data structures. cmgr.removePeer(pp) diff --git a/lib/constants.go b/lib/constants.go index f92e05f1e..42645283e 100644 --- a/lib/constants.go +++ b/lib/constants.go @@ -745,6 +745,9 @@ type DeSoParams struct { // HandshakeTimeoutMicroSeconds is the timeout for the peer handshake certificate. The default value is 15 minutes. HandshakeTimeoutMicroSeconds uint64 + // DisableNetworkManagerRoutines is a testing flag that disables the network manager routines. + DisableNetworkManagerRoutines bool + ForkHeights ForkHeights EncoderMigrationHeights *EncoderMigrationHeights @@ -1164,6 +1167,9 @@ var DeSoMainnetParams = DeSoParams{ // The peer handshake certificate timeout. HandshakeTimeoutMicroSeconds: uint64(900000000), + // DisableNetworkManagerRoutines is a testing flag that disables the network manager routines. + DisableNetworkManagerRoutines: false, + ForkHeights: MainnetForkHeights, EncoderMigrationHeights: GetEncoderMigrationHeights(&MainnetForkHeights), EncoderMigrationHeightsList: GetEncoderMigrationHeightsList(&MainnetForkHeights), @@ -1437,6 +1443,9 @@ var DeSoTestnetParams = DeSoParams{ // The peer handshake certificate timeout. HandshakeTimeoutMicroSeconds: uint64(900000000), + // DisableNetworkManagerRoutines is a testing flag that disables the network manager routines. + DisableNetworkManagerRoutines: false, + ForkHeights: TestnetForkHeights, EncoderMigrationHeights: GetEncoderMigrationHeights(&TestnetForkHeights), EncoderMigrationHeightsList: GetEncoderMigrationHeightsList(&TestnetForkHeights), diff --git a/lib/handshake_controller.go b/lib/handshake_controller.go index f355bad93..385e36275 100644 --- a/lib/handshake_controller.go +++ b/lib/handshake_controller.go @@ -38,18 +38,13 @@ func (hc *HandshakeController) InitiateHandshake(rn *RemoteNode) { hc.usedNonces.Add(nonce) } -// _handleHandshakeCompleteMessage handles HandshakeComplete control messages, sent by RemoteNodes. -func (hc *HandshakeController) _handleHandshakeCompleteMessage(origin *Peer, desoMsg DeSoMessage) { +// handleHandshakeComplete handles HandshakeComplete control messages, sent by RemoteNodes. +func (hc *HandshakeController) handleHandshakeComplete(remoteNode *RemoteNode) { // Prevent race conditions while handling handshake complete messages. hc.mtxHandshakeComplete.Lock() defer hc.mtxHandshakeComplete.Unlock() - if desoMsg.GetMsgType() != MsgTypePeerHandshakeComplete { - return - } - // Get the handshake information of this peer. - remoteNode := hc.rnManager.GetRemoteNodeFromPeer(origin) if remoteNode == nil { return } @@ -60,7 +55,8 @@ func (hc *HandshakeController) _handleHandshakeCompleteMessage(origin *Peer, des } if err := hc.handleHandshakeCompletePoSMessage(remoteNode); err != nil { - glog.Errorf("HandshakeController._handleHandshakeCompleteMessage: Error handling PoS handshake peer message: %v", err) + glog.Errorf("HandshakeController.handleHandshakeComplete: Error handling PoS handshake peer message: %v, "+ + "remoteNodePk (%s)", err, remoteNode.GetValidatorPublicKey().Serialize()) hc.rnManager.Disconnect(remoteNode) return } @@ -165,6 +161,8 @@ func (hc *HandshakeController) _handleVerackMessage(origin *Peer, desoMsg DeSoMe glog.Errorf("HandshakeController._handleVerackMessage: Requesting PeerDisconnect for id: (%v) "+ "error handling verack message: %v", origin.ID, err) hc.rnManager.Disconnect(rn) + return } - return + + hc.handleHandshakeComplete(rn) } diff --git a/lib/network.go b/lib/network.go index 23bc86765..120348c6c 100644 --- a/lib/network.go +++ b/lib/network.go @@ -854,23 +854,6 @@ func (msg *MsgDeSoDisconnectedPeer) FromBytes(data []byte) error { return fmt.Errorf("MsgDeSoDisconnectedPeer.FromBytes not implemented") } -// MsgDeSoPeerHandshakeComplete is a control message that is used to internally signal when a peer has -// connected and completed the Version + Verack handshake and authentication process. -type MsgDeSoPeerHandshakeComplete struct { -} - -func (msg *MsgDeSoPeerHandshakeComplete) GetMsgType() MsgType { - return MsgTypePeerHandshakeComplete -} - -func (msg *MsgDeSoPeerHandshakeComplete) ToBytes(preSignature bool) ([]byte, error) { - return nil, fmt.Errorf("MsgDeSoPeerHandshakeComplete.ToBytes: Not implemented") -} - -func (msg *MsgDeSoPeerHandshakeComplete) FromBytes(data []byte) error { - return fmt.Errorf("MsgDeSoPeerHandshakeComplete.FromBytes not implemented") -} - type ConnectionType uint8 const ( diff --git a/lib/network_connection.go b/lib/network_connection.go index ffb0bb1f1..4d50d22a8 100644 --- a/lib/network_connection.go +++ b/lib/network_connection.go @@ -91,8 +91,9 @@ type OutboundConnectionAttempt struct { // connectionChan is used to send the result of the connection attempt to the caller thread. connectionChan chan *outboundConnection - exitChan chan bool - status outboundConnectionAttemptStatus + startGroup sync.WaitGroup + exitChan chan bool + status outboundConnectionAttemptStatus } type outboundConnectionAttemptStatus int @@ -126,11 +127,14 @@ func (oca *OutboundConnectionAttempt) Start() { return } + oca.startGroup.Add(1) go oca.start() + oca.startGroup.Wait() oca.status = outboundConnectionAttemptRunning } func (oca *OutboundConnectionAttempt) start() { + oca.startGroup.Done() oca.retryCount = 0 out: @@ -198,7 +202,7 @@ func (oca *OutboundConnectionAttempt) SetTimeoutUnit(timeoutUnit time.Duration) // Otherwise, it will return nil. func (oca *OutboundConnectionAttempt) attemptOutboundConnection() net.Conn { // If the peer is not persistent, update the addrmgr. - glog.V(1).Infof("Attempting to connect to addr: %v", oca.netAddr.IP.String()) + glog.V(1).Infof("Attempting to connect to addr: %v:%v", oca.netAddr.IP.String(), oca.netAddr.Port) var err error tcpAddr := net.TCPAddr{ diff --git a/lib/peer.go b/lib/peer.go index 0af9aa0b7..e0aae7e77 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -5,6 +5,7 @@ import ( "github.com/decred/dcrd/lru" "net" "sort" + "sync" "sync/atomic" "time" @@ -140,6 +141,9 @@ type Peer struct { // SyncType indicates whether blocksync should not be requested for this peer. If set to true // then we'll only hypersync from this peer. syncType NodeSyncType + + // startGroup ensures that all the Peer's go routines are started when we call Start(). + startGroup sync.WaitGroup } func (pp *Peer) GetId() uint64 { @@ -550,6 +554,7 @@ func (pp *Peer) cleanupMessageProcessor() { } func (pp *Peer) StartDeSoMessageProcessor() { + pp.startGroup.Done() glog.Infof("StartDeSoMessageProcessor: Starting for peer %v", pp) for { if pp.disconnected != 0 { @@ -739,6 +744,7 @@ func (pp *Peer) HandlePongMsg(msg *MsgDeSoPong) { } func (pp *Peer) PingHandler() { + pp.startGroup.Done() glog.V(1).Infof("Peer.PingHandler: Starting ping handler for Peer %v", pp) pingTicker := time.NewTicker(pingInterval) defer pingTicker.Stop() @@ -908,6 +914,7 @@ func (pp *Peer) _setKnownAddressesMap(key string, val bool) { } func (pp *Peer) outHandler() { + pp.startGroup.Done() glog.V(1).Infof("Peer.outHandler: Starting outHandler for Peer %v", pp) stallTicker := time.NewTicker(time.Second) out: @@ -1087,6 +1094,7 @@ func (pp *Peer) _handleInExpectedResponse(rmsg DeSoMessage) error { // inHandler handles all incoming messages for the peer. It must be run as a // goroutine. func (pp *Peer) inHandler() { + pp.startGroup.Done() glog.V(1).Infof("Peer.inHandler: Starting inHandler for Peer %v", pp) // The timer is stopped when a new message is received and reset after it @@ -1184,10 +1192,12 @@ func (pp *Peer) Start() { glog.Infof("Peer.Start: Starting peer %v", pp) // The protocol has been negotiated successfully so start processing input // and output messages. + pp.startGroup.Add(4) go pp.PingHandler() go pp.outHandler() go pp.inHandler() go pp.StartDeSoMessageProcessor() + pp.startGroup.Wait() // If the address manager needs more addresses, then send a GetAddr message // to the peer. This is best-effort. @@ -1290,7 +1300,7 @@ func (pp *Peer) Disconnect() { } atomic.AddInt32(&pp.disconnected, 1) - glog.V(1).Infof("Peer.Disconnect: Running Disconnect for the first time for Peer %v", pp) + glog.V(2).Infof("Peer.Disconnect: Running Disconnect for the first time for Peer %v", pp) // Close the connection object. pp.Conn.Close() diff --git a/lib/remote_node.go b/lib/remote_node.go index 5ba651f3f..b74ba9e23 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -63,6 +63,12 @@ type RemoteNode struct { // the RemoteNode is instantiated. And for inbound validator nodes, the validatorPublicKey will be set when the // handshake is completed. validatorPublicKey *bls.PublicKey + // isPersistent identifies whether the RemoteNode is persistent or not. Persistent RemoteNodes is a sub-category of + // outbound RemoteNodes. They are different from non-persistent RemoteNodes from the very moment they are created. + // Initially, an outbound RemoteNode is in an "attempted" state, meaning we dial the connection to the peer. The + // non-persistent RemoteNode is terminated after the first failed dial, while a persistent RemoteNode will keep + // trying to dial the peer indefinitely until the connection is established, or the node stops. + isPersistent bool connectionStatus RemoteNodeStatus @@ -128,11 +134,13 @@ func NewHandshakeMetadata() *HandshakeMetadata { return &HandshakeMetadata{} } -func NewRemoteNode(id RemoteNodeId, validatorPublicKey *bls.PublicKey, srv *Server, cmgr *ConnectionManager, keystore *BLSKeystore, - params *DeSoParams, minTxFeeRateNanosPerKB uint64, latestBlockHeight uint64, nodeServices ServiceFlag) *RemoteNode { +func NewRemoteNode(id RemoteNodeId, validatorPublicKey *bls.PublicKey, isPersistent bool, srv *Server, + cmgr *ConnectionManager, keystore *BLSKeystore, params *DeSoParams, minTxFeeRateNanosPerKB uint64, + latestBlockHeight uint64, nodeServices ServiceFlag) *RemoteNode { return &RemoteNode{ id: id, validatorPublicKey: validatorPublicKey, + isPersistent: isPersistent, connectionStatus: RemoteNodeStatus_NotConnected, handshakeMetadata: NewHandshakeMetadata(), srv: srv, @@ -208,7 +216,7 @@ func (rn *RemoteNode) IsOutbound() bool { } func (rn *RemoteNode) IsPersistent() bool { - return rn.peer != nil && rn.peer.IsPersistent() + return rn.isPersistent } func (rn *RemoteNode) IsNotConnected() bool { @@ -219,6 +227,14 @@ func (rn *RemoteNode) IsConnected() bool { return rn.connectionStatus == RemoteNodeStatus_Connected } +func (rn *RemoteNode) IsVersionSent() bool { + return rn.connectionStatus == RemoteNodeStatus_VersionSent +} + +func (rn *RemoteNode) IsVerackSent() bool { + return rn.connectionStatus == RemoteNodeStatus_VerackSent +} + func (rn *RemoteNode) IsHandshakeCompleted() bool { return rn.connectionStatus == RemoteNodeStatus_HandshakeCompleted } @@ -234,6 +250,10 @@ func (rn *RemoteNode) IsValidator() bool { return rn.hasValidatorServiceFlag() } +func (rn *RemoteNode) IsExpectedValidator() bool { + return rn.GetValidatorPublicKey() != nil +} + func (rn *RemoteNode) hasValidatorServiceFlag() bool { return rn.GetServiceFlag().HasService(SFPosValidator) } @@ -304,6 +324,8 @@ func (rn *RemoteNode) Disconnect() { if rn.connectionStatus == RemoteNodeStatus_Terminated { return } + glog.V(2).Infof("RemoteNode.Disconnect: Disconnecting from peer (id= %d, status= %v)", + rn.id, rn.connectionStatus) id := rn.GetId().ToUint64() switch rn.connectionStatus { @@ -344,9 +366,9 @@ func (rn *RemoteNode) InitiateHandshake(nonce uint64) error { return fmt.Errorf("InitiateHandshake: Remote node is not connected") } + versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + rn.versionTimeExpected = &versionTimeExpected if rn.GetPeer().IsOutbound() { - versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) - rn.versionTimeExpected = &versionTimeExpected if err := rn.sendVersionMessage(nonce); err != nil { return fmt.Errorf("InitiateHandshake: Problem sending version message to peer (id= %d): %v", rn.id, err) } @@ -397,6 +419,19 @@ func (rn *RemoteNode) newVersionMessage(nonce uint64) *MsgDeSoVersion { return ver } +func (rn *RemoteNode) IsTimedOut() bool { + if rn.IsTerminated() { + return true + } + if rn.IsConnected() || rn.IsVersionSent() { + return rn.versionTimeExpected.Before(time.Now()) + } + if rn.IsVerackSent() { + return rn.verackTimeExpected.Before(time.Now()) + } + return false +} + // HandleVersionMessage is called upon receiving a version message from the RemoteNode's peer. The peer may be the one // initiating the handshake, in which case, we should respond with our own version message. To do this, we pass the // responseNonce to this function, which we will use in our response version message. @@ -404,7 +439,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce rn.mtx.Lock() defer rn.mtx.Unlock() - if rn.connectionStatus != RemoteNodeStatus_Connected && rn.connectionStatus != RemoteNodeStatus_VersionSent { + if !rn.IsConnected() && !rn.IsVersionSent() { return fmt.Errorf("HandleVersionMessage: RemoteNode is not connected or version exchange has already "+ "been completed, connectionStatus: %v", rn.connectionStatus) } @@ -416,7 +451,7 @@ func (rn *RemoteNode) HandleVersionMessage(verMsg *MsgDeSoVersion, responseNonce } // Verify that the peer's version message is sent within the version negotiation timeout. - if rn.versionTimeExpected != nil && rn.versionTimeExpected.Before(time.Now()) { + if rn.versionTimeExpected.Before(time.Now()) { return fmt.Errorf("RemoteNode.HandleVersionMessage: Requesting disconnect for id: (%v) "+ "version timeout. Time expected: %v, now: %v", rn.id, rn.versionTimeExpected.UnixMicro(), time.Now().UnixMicro()) } @@ -580,7 +615,6 @@ func (rn *RemoteNode) HandleVerackMessage(vrkMsg *MsgDeSoVerack) error { vMeta.versionNegotiated = true rn._logVersionSuccess() rn.setStatusHandshakeCompleted() - rn.srv.NotifyHandshakePeerMessage(rn.peer) return nil } diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index 02bed8e3e..2457af58f 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -51,12 +51,19 @@ func NewRemoteNodeManager(srv *Server, bc *Blockchain, cmgr *ConnectionManager, } } -func (manager *RemoteNodeManager) newRemoteNode(validatorPublicKey *bls.PublicKey) *RemoteNode { +func (manager *RemoteNodeManager) DisconnectAll() { + for _, rn := range manager.GetAllRemoteNodes().GetAll() { + glog.V(2).Infof("RemoteNodeManager.DisconnectAll: Disconnecting from remote node (id=%v)", rn.GetId()) + manager.Disconnect(rn) + } +} + +func (manager *RemoteNodeManager) newRemoteNode(validatorPublicKey *bls.PublicKey, isPersistent bool) *RemoteNode { id := atomic.AddUint64(&manager.remoteNodeIndex, 1) remoteNodeId := NewRemoteNodeId(id) latestBlockHeight := uint64(manager.bc.BlockTip().Height) - return NewRemoteNode(remoteNodeId, validatorPublicKey, manager.srv, manager.cmgr, manager.keystore, manager.params, - manager.minTxFeeRateNanosPerKB, latestBlockHeight, manager.nodeServices) + return NewRemoteNode(remoteNodeId, validatorPublicKey, isPersistent, manager.srv, manager.cmgr, manager.keystore, + manager.params, manager.minTxFeeRateNanosPerKB, latestBlockHeight, manager.nodeServices) } func (manager *RemoteNodeManager) ProcessCompletedHandshake(remoteNode *RemoteNode) { @@ -78,7 +85,7 @@ func (manager *RemoteNodeManager) Disconnect(rn *RemoteNode) { if rn == nil { return } - glog.V(2).Infof("RemoteNodeManager.Disconnect: Disconnecting from remote node %v", rn.GetId()) + glog.V(2).Infof("RemoteNodeManager.Disconnect: Disconnecting from remote node id=%v", rn.GetId()) rn.Disconnect() manager.removeRemoteNodeFromIndexer(rn) } @@ -126,6 +133,18 @@ func (manager *RemoteNodeManager) SendMessage(rn *RemoteNode, desoMessage DeSoMe return rn.SendMessage(desoMessage) } +func (manager *RemoteNodeManager) Cleanup() { + manager.mtx.Lock() + defer manager.mtx.Unlock() + + for _, rn := range manager.GetAllRemoteNodes().GetAll() { + if rn.IsTimedOut() { + glog.V(2).Infof("RemoteNodeManager.Cleanup: Disconnecting from remote node (id=%v)", rn.GetId()) + manager.Disconnect(rn) + } + } +} + // ########################### // ## Create RemoteNode // ########################### @@ -139,7 +158,7 @@ func (manager *RemoteNodeManager) CreateValidatorConnection(netAddr *wire.NetAdd return fmt.Errorf("RemoteNodeManager.CreateValidatorConnection: RemoteNode already exists for public key: %v", publicKey) } - remoteNode := manager.newRemoteNode(publicKey) + remoteNode := manager.newRemoteNode(publicKey, false) if err := remoteNode.DialOutboundConnection(netAddr); err != nil { return errors.Wrapf(err, "RemoteNodeManager.CreateValidatorConnection: Problem calling DialPersistentOutboundConnection "+ "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) @@ -154,7 +173,7 @@ func (manager *RemoteNodeManager) CreateNonValidatorPersistentOutboundConnection return 0, fmt.Errorf("RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: netAddr is nil") } - remoteNode := manager.newRemoteNode(nil) + remoteNode := manager.newRemoteNode(nil, true) if err := remoteNode.DialPersistentOutboundConnection(netAddr); err != nil { return 0, errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorPersistentOutboundConnection: Problem calling DialPersistentOutboundConnection "+ "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) @@ -169,7 +188,7 @@ func (manager *RemoteNodeManager) CreateNonValidatorOutboundConnection(netAddr * return fmt.Errorf("RemoteNodeManager.CreateNonValidatorOutboundConnection: netAddr is nil") } - remoteNode := manager.newRemoteNode(nil) + remoteNode := manager.newRemoteNode(nil, false) if err := remoteNode.DialOutboundConnection(netAddr); err != nil { return errors.Wrapf(err, "RemoteNodeManager.CreateNonValidatorOutboundConnection: Problem calling DialOutboundConnection "+ "for addr: (%s:%v)", netAddr.IP.String(), netAddr.Port) @@ -182,7 +201,7 @@ func (manager *RemoteNodeManager) CreateNonValidatorOutboundConnection(netAddr * func (manager *RemoteNodeManager) AttachInboundConnection(conn net.Conn, na *wire.NetAddress) (*RemoteNode, error) { - remoteNode := manager.newRemoteNode(nil) + remoteNode := manager.newRemoteNode(nil, false) if err := remoteNode.AttachInboundConnection(conn, na); err != nil { return remoteNode, errors.Wrapf(err, "RemoteNodeManager.AttachInboundConnection: Problem calling AttachInboundConnection "+ "for addr: (%s)", conn.RemoteAddr().String()) @@ -205,7 +224,7 @@ func (manager *RemoteNodeManager) AttachOutboundConnection(conn net.Conn, na *wi if err := remoteNode.AttachOutboundConnection(conn, na, isPersistent); err != nil { manager.Disconnect(remoteNode) return nil, errors.Wrapf(err, "RemoteNodeManager.AttachOutboundConnection: Problem calling AttachOutboundConnection "+ - "for addr: (%s)", conn.RemoteAddr().String()) + "for addr: (%s). Disconnecting remote node (id=%v)", conn.RemoteAddr().String(), remoteNode.GetId()) } return remoteNode, nil diff --git a/lib/server.go b/lib/server.go index 1641770cc..0ef8afae5 100644 --- a/lib/server.go +++ b/lib/server.go @@ -450,7 +450,7 @@ func NewServer( timesource := chainlib.NewMedianTime() // Create a new connection manager but note that it won't be initialized until Start(). - _incomingMessages := make(chan *ServerMessage, (_targetOutboundPeers+_maxInboundPeers)*3) + _incomingMessages := make(chan *ServerMessage, 100+(_targetOutboundPeers+_maxInboundPeers)*3) _cmgr := NewConnectionManager( _params, _listeners, _connectIps, timesource, _hyperSync, _syncType, _stallTimeoutSeconds, _minFeeRateNanosPerKB, @@ -739,13 +739,6 @@ func (srv *Server) GetSnapshot(pp *Peer) { "with Prefix (%v) and SnapshotStartEntry (%v)", pp, prefix, lastReceivedKey) } -func (srv *Server) NotifyHandshakePeerMessage(peer *Peer) { - srv.incomingMessages <- &ServerMessage{ - Peer: peer, - Msg: &MsgDeSoPeerHandshakeComplete{}, - } -} - // GetBlocksToStore is part of the archival mode, which makes the node download all historical blocks after completing // hypersync. We will go through all blocks corresponding to the snapshot and download the blocks. func (srv *Server) GetBlocksToStore(pp *Peer) { @@ -2264,8 +2257,6 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_shouldQuit bool) { switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. - case *MsgDeSoPeerHandshakeComplete: - srv.handshakeController._handleHandshakeCompleteMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) srv.connectionController._handleDonePeerMessage(serverMessage.Peer, serverMessage.Msg) From f60ec17b3029d59d39664969cd2afee4cfadb0b6 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Mon, 29 Jan 2024 21:13:07 -0800 Subject: [PATCH 13/37] PoS NetworkManager Address (#957) * Revert "Another split" This reverts commit eaeec5875a84621b4888fc2a6104e9904e7ef53d. * Revert routine stops * gofmt * Add addrMgr to Server * Review --- integration_testing/connection_bridge.go | 6 +- lib/connection_controller.go | 18 +-- lib/connection_manager.go | 12 +- lib/peer.go | 13 +- lib/remote_node.go | 7 ++ lib/server.go | 154 ++++++++++++++--------- 6 files changed, 114 insertions(+), 96 deletions(-) diff --git a/integration_testing/connection_bridge.go b/integration_testing/connection_bridge.go index f6a9897ed..139c7cafb 100644 --- a/integration_testing/connection_bridge.go +++ b/integration_testing/connection_bridge.go @@ -113,13 +113,12 @@ func (bridge *ConnectionBridge) createInboundConnection(node *cmd.Node) *lib.Pee // This channel is redundant in our setting. messagesFromPeer := make(chan *lib.ServerMessage, 100) - newPeerChan := make(chan *lib.Peer, 100) donePeerChan := make(chan *lib.Peer, 100) // Because it is an inbound Peer of the node, it is simultaneously a "fake" outbound Peer of the bridge. // Hence, we will mark the _isOutbound parameter as "true" in NewPeer. peer := lib.NewPeer(uint64(lib.RandInt64(math.MaxInt64)), conn, true, netAddress, true, 10000, 0, &lib.DeSoMainnetParams, - messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, newPeerChan, donePeerChan) + messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, donePeerChan) return peer } @@ -144,11 +143,10 @@ func (bridge *ConnectionBridge) createOutboundConnection(node *cmd.Node, otherNo addrMgr := addrmgr.New("", net.LookupIP) na, err := lib.IPToNetAddr(conn.RemoteAddr().String(), addrMgr, otherNode.Params) messagesFromPeer := make(chan *lib.ServerMessage, 100) - newPeerChan := make(chan *lib.Peer, 100) donePeerChan := make(chan *lib.Peer, 100) peer := lib.NewPeer(uint64(lib.RandInt64(math.MaxInt64)), conn, false, na, false, 10000, 0, bridge.nodeB.Params, - messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, newPeerChan, donePeerChan) + messagesFromPeer, nil, nil, lib.NodeSyncTypeAny, donePeerChan) bridge.newPeerChan <- peer //} }(ll) diff --git a/lib/connection_controller.go b/lib/connection_controller.go index ac8467b09..ef021e100 100644 --- a/lib/connection_controller.go +++ b/lib/connection_controller.go @@ -97,11 +97,11 @@ func (cc *ConnectionController) Start() { go cc.startRemoteNodeCleanup() cc.startGroup.Wait() - cc.exitGroup.Add(4) } func (cc *ConnectionController) Stop() { if !cc.params.DisableNetworkManagerRoutines { + cc.exitGroup.Add(4) close(cc.exitChan) cc.exitGroup.Wait() } @@ -201,22 +201,6 @@ func (cc *ConnectionController) _handleDonePeerMessage(origin *Peer, desoMsg DeS } } -func (cc *ConnectionController) _handleAddrMessage(origin *Peer, desoMsg DeSoMessage) { - if desoMsg.GetMsgType() != MsgTypeAddr { - return - } - - // TODO -} - -func (cc *ConnectionController) _handleGetAddrMessage(origin *Peer, desoMsg DeSoMessage) { - if desoMsg.GetMsgType() != MsgTypeGetAddr { - return - } - - // TODO -} - // _handleNewConnectionMessage is called when a new outbound or inbound connection is established. It is responsible // for creating a RemoteNode from the connection and initiating the handshake. The incoming DeSoMessage is a control message. func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMsg DeSoMessage) { diff --git a/lib/connection_manager.go b/lib/connection_manager.go index 761fb048e..ffc56aa25 100644 --- a/lib/connection_manager.go +++ b/lib/connection_manager.go @@ -105,9 +105,8 @@ type ConnectionManager struct { // peers' time. timeSource chainlib.MedianTimeSource - // Events that can happen to a peer. - newPeerChan chan *Peer - donePeerChan chan *Peer + // peerDisconnectedChan is notified whenever a peer exits. + peerDisconnectedChan chan *Peer // stallTimeoutSeconds is how long we wait to receive responses from Peers // for certain types of messages. @@ -152,8 +151,7 @@ func NewConnectionManager( attemptedOutboundAddrs: make(map[string]bool), // Initialize the channels. - newPeerChan: make(chan *Peer, 100), - donePeerChan: make(chan *Peer, 100), + peerDisconnectedChan: make(chan *Peer, 100), outboundConnectionChan: make(chan *outboundConnection, 100), inboundConnectionChan: make(chan *inboundConnection, 100), @@ -301,7 +299,7 @@ func (cmgr *ConnectionManager) ConnectPeer(id uint64, conn net.Conn, na *wire.Ne cmgr.minFeeRateNanosPerKB, cmgr.params, cmgr.srv.incomingMessages, cmgr, cmgr.srv, cmgr.SyncType, - cmgr.newPeerChan, cmgr.donePeerChan) + cmgr.peerDisconnectedChan) // Now we can add the peer to our data structures. peer._logAddPeer() @@ -635,7 +633,7 @@ func (cmgr *ConnectionManager) Start() { Connection: ic, }, } - case pp := <-cmgr.donePeerChan: + case pp := <-cmgr.peerDisconnectedChan: { // By the time we get here, it can be assumed that the Peer's Disconnect function // has already been called, since that is what's responsible for adding the peer diff --git a/lib/peer.go b/lib/peer.go index e0aae7e77..c59d77921 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -100,9 +100,8 @@ type Peer struct { knownAddressesMap map[string]bool // Output queue for messages that need to be sent to the peer. - outputQueueChan chan DeSoMessage - newPeerChan chan *Peer - donePeerChan chan *Peer + outputQueueChan chan DeSoMessage + peerDisconnectedChan chan *Peer // Set to zero until Disconnect has been called on the Peer. Used to make it // so that the logic in Disconnect will only be executed once. @@ -625,8 +624,7 @@ func NewPeer(_id uint64, _conn net.Conn, _isOutbound bool, _netAddr *wire.NetAdd messageChan chan *ServerMessage, _cmgr *ConnectionManager, _srv *Server, _syncType NodeSyncType, - newPeerChan chan *Peer, - donePeerChan chan *Peer) *Peer { + peerDisconnectedChan chan *Peer) *Peer { pp := Peer{ ID: _id, @@ -638,8 +636,7 @@ func NewPeer(_id uint64, _conn net.Conn, _isOutbound bool, _netAddr *wire.NetAdd isOutbound: _isOutbound, isPersistent: _isPersistent, outputQueueChan: make(chan DeSoMessage), - newPeerChan: newPeerChan, - donePeerChan: donePeerChan, + peerDisconnectedChan: peerDisconnectedChan, quit: make(chan interface{}), knownInventory: lru.NewCache(maxKnownInventory), blocksToSend: make(map[BlockHash]bool), @@ -1310,7 +1307,7 @@ func (pp *Peer) Disconnect() { // Add the Peer to donePeers so that the ConnectionManager and Server can do any // cleanup they need to do. - pp.donePeerChan <- pp + pp.peerDisconnectedChan <- pp } func (pp *Peer) _logVersionSuccess() { diff --git a/lib/remote_node.go b/lib/remote_node.go index b74ba9e23..42fe21521 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -207,6 +207,13 @@ func (rn *RemoteNode) GetUserAgent() string { return rn.handshakeMetadata.userAgent } +func (rn *RemoteNode) GetNetAddress() *wire.NetAddress { + if !rn.IsHandshakeCompleted() || rn.GetPeer() == nil { + return nil + } + return rn.GetPeer().NetAddress() +} + func (rn *RemoteNode) IsInbound() bool { return rn.peer != nil && !rn.peer.IsOutbound() } diff --git a/lib/server.go b/lib/server.go index 0ef8afae5..987431af8 100644 --- a/lib/server.go +++ b/lib/server.go @@ -68,6 +68,8 @@ type Server struct { connectionController *ConnectionController // posMempool *PosMemPool TODO: Add the mempool later + params *DeSoParams + // All messages received from peers get sent from the ConnectionManager to the // Server through this channel. // @@ -130,7 +132,9 @@ type Server struct { // It is organized in this way so that we can limit the number of addresses we // are distributing for a single peer to avoid a DOS attack. addrsToBroadcastLock deadlock.RWMutex - addrsToBroadcastt map[string][]*SingleAddr + addrsToBroadcast map[string][]*SingleAddr + + AddrMgr *addrmgr.AddrManager // When set to true, we disable the ConnectionManager DisableNetworking bool @@ -438,6 +442,7 @@ func NewServer( snapshot: _snapshot, nodeMessageChannel: _nodeMessageChan, forceChecksum: _forceChecksum, + AddrMgr: _desoAddrMgr, } if stateChangeSyncer != nil { @@ -590,7 +595,7 @@ func NewServer( } // Initialize the addrs to broadcast map. - srv.addrsToBroadcastt = make(map[string][]*SingleAddr) + srv.addrsToBroadcast = make(map[string][]*SingleAddr) // This will initialize the request queues. srv.ResetRequestQueues() @@ -2171,20 +2176,33 @@ func (srv *Server) StartStatsdReporter() { }() } -func (srv *Server) _handleAddrMessage(pp *Peer, msg *MsgDeSoAddr) { +func (srv *Server) _handleAddrMessage(pp *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeAddr { + return + } + + id := NewRemoteNodeId(pp.ID) + var msg *MsgDeSoAddr + var ok bool + if msg, ok = desoMsg.(*MsgDeSoAddr); !ok { + glog.Errorf("Server._handleAddrMessage: Problem decoding MsgDeSoAddr: %v", spew.Sdump(desoMsg)) + srv.connectionController.rnManager.DisconnectById(id) + return + } + srv.addrsToBroadcastLock.Lock() defer srv.addrsToBroadcastLock.Unlock() - glog.V(1).Infof("Server._handleAddrMessage: Received Addr from peer %v with addrs %v", pp, spew.Sdump(msg.AddrList)) + glog.V(1).Infof("Server._handleAddrMessage: Received Addr from peer id=%v with addrs %v", pp.ID, spew.Sdump(msg.AddrList)) // If this addr message contains more than the maximum allowed number of addresses // then disconnect this peer. if len(msg.AddrList) > MaxAddrsPerAddrMsg { glog.Errorf(fmt.Sprintf("Server._handleAddrMessage: Disconnecting "+ - "Peer %v for sending us an addr message with %d transactions, which exceeds "+ + "Peer id=%v for sending us an addr message with %d transactions, which exceeds "+ "the max allowed %d", - pp, len(msg.AddrList), MaxAddrsPerAddrMsg)) - pp.Disconnect() + pp.ID, len(msg.AddrList), MaxAddrsPerAddrMsg)) + srv.connectionController.rnManager.DisconnectById(id) return } @@ -2193,19 +2211,16 @@ func (srv *Server) _handleAddrMessage(pp *Peer, msg *MsgDeSoAddr) { for _, addr := range msg.AddrList { addrAsNetAddr := wire.NewNetAddressIPPort(addr.IP, addr.Port, (wire.ServiceFlag)(addr.Services)) if !addrmgr.IsRoutable(addrAsNetAddr) { - glog.V(1).Infof("Dropping address %v from peer %v because it is not routable", addr, pp) + glog.V(1).Infof("Server._handleAddrMessage: Dropping address %v from peer %v because it is not routable", addr, pp) continue } netAddrsReceived = append( netAddrsReceived, addrAsNetAddr) } - // TODO: temporary - addressMgr := addrmgr.New("", net.LookupIP) - addressMgr.AddAddresses(netAddrsReceived, pp.netAddr) + srv.AddrMgr.AddAddresses(netAddrsReceived, pp.netAddr) - // If the message had <= 10 addrs in it, then queue all the addresses for relaying - // on the next cycle. + // If the message had <= 10 addrs in it, then queue all the addresses for relaying on the next cycle. if len(msg.AddrList) <= 10 { glog.V(1).Infof("Server._handleAddrMessage: Queueing %d addrs for forwarding from "+ "peer %v", len(msg.AddrList), pp) @@ -2215,7 +2230,7 @@ func (srv *Server) _handleAddrMessage(pp *Peer, msg *MsgDeSoAddr) { Port: pp.netAddr.Port, Services: pp.serviceFlags, } - listToAddTo, hasSeenSource := srv.addrsToBroadcastt[sourceAddr.StringWithPort(false /*includePort*/)] + listToAddTo, hasSeenSource := srv.addrsToBroadcast[sourceAddr.StringWithPort(false /*includePort*/)] if !hasSeenSource { listToAddTo = []*SingleAddr{} } @@ -2225,17 +2240,27 @@ func (srv *Server) _handleAddrMessage(pp *Peer, msg *MsgDeSoAddr) { listToAddTo = listToAddTo[:MaxAddrsPerAddrMsg/2] } listToAddTo = append(listToAddTo, msg.AddrList...) - srv.addrsToBroadcastt[sourceAddr.StringWithPort(false /*includePort*/)] = listToAddTo + srv.addrsToBroadcast[sourceAddr.StringWithPort(false /*includePort*/)] = listToAddTo } } -func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { +func (srv *Server) _handleGetAddrMessage(pp *Peer, desoMsg DeSoMessage) { + if desoMsg.GetMsgType() != MsgTypeGetAddr { + return + } + + id := NewRemoteNodeId(pp.ID) + if _, ok := desoMsg.(*MsgDeSoGetAddr); !ok { + glog.Errorf("Server._handleAddrMessage: Problem decoding "+ + "MsgDeSoAddr: %v", spew.Sdump(desoMsg)) + srv.connectionController.rnManager.DisconnectById(id) + return + } + glog.V(1).Infof("Server._handleGetAddrMessage: Received GetAddr from peer %v", pp) // When we get a GetAddr message, choose MaxAddrsPerMsg from the AddrMgr // and send them back to the peer. - // TODO: temporary - addressMgr := addrmgr.New("", net.LookupIP) - netAddrsFound := addressMgr.AddressCache() + netAddrsFound := srv.AddrMgr.AddressCache() if len(netAddrsFound) > MaxAddrsPerAddrMsg { netAddrsFound = netAddrsFound[:MaxAddrsPerAddrMsg] } @@ -2251,7 +2276,12 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, msg *MsgDeSoGetAddr) { } res.AddrList = append(res.AddrList, singleAddr) } - pp.AddDeSoMessage(res, false) + rn := srv.connectionController.rnManager.GetRemoteNodeById(id) + if err := srv.connectionController.rnManager.SendMessage(rn, res); err != nil { + glog.Errorf("Server._handleGetAddrMessage: Problem sending addr message to peer %v: %v", pp, err) + srv.connectionController.rnManager.DisconnectById(id) + return + } } func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_shouldQuit bool) { @@ -2274,9 +2304,9 @@ func (srv *Server) _handlePeerMessages(serverMessage *ServerMessage) { switch msg := serverMessage.Msg.(type) { // Messages sent among peers. case *MsgDeSoAddr: - srv.connectionController._handleAddrMessage(serverMessage.Peer, serverMessage.Msg) + srv._handleAddrMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoGetAddr: - srv.connectionController._handleGetAddrMessage(serverMessage.Peer, serverMessage.Msg) + srv._handleGetAddrMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoGetHeaders: srv._handleGetHeaders(serverMessage.Peer, msg) case *MsgDeSoHeaderBundle: @@ -2399,20 +2429,6 @@ func (srv *Server) _startConsensus() { glog.V(2).Infof("Server._startConsensus: Handling message of type %v from Peer %v", serverMessage.Msg.GetMsgType(), serverMessage.Peer) - - // If the message is an addr message we handle it independent of whether or - // not the BitcoinManager is synced. - if serverMessage.Msg.GetMsgType() == MsgTypeAddr { - srv._handleAddrMessage(serverMessage.Peer, serverMessage.Msg.(*MsgDeSoAddr)) - continue - } - // If the message is a GetAddr message we handle it independent of whether or - // not the BitcoinManager is synced. - if serverMessage.Msg.GetMsgType() == MsgTypeGetAddr { - srv._handleGetAddrMessage(serverMessage.Peer, serverMessage.Msg.(*MsgDeSoGetAddr)) - continue - } - srv._handlePeerMessages(serverMessage) // Always check for and handle control messages regardless of whether the @@ -2433,35 +2449,36 @@ func (srv *Server) _startConsensus() { glog.V(2).Info("Server.Start: Server done") } -func (srv *Server) _getAddrsToBroadcast() []*SingleAddr { +func (srv *Server) getAddrsToBroadcast() []*SingleAddr { srv.addrsToBroadcastLock.Lock() defer srv.addrsToBroadcastLock.Unlock() // If there's nothing in the map, return. - if len(srv.addrsToBroadcastt) == 0 { + if len(srv.addrsToBroadcast) == 0 { return []*SingleAddr{} } // If we get here then we have some addresses to broadcast. addrsToBroadcast := []*SingleAddr{} - for len(addrsToBroadcast) < 10 && len(srv.addrsToBroadcastt) > 0 { + for uint32(len(addrsToBroadcast)) < srv.params.MaxAddressesToBroadcast && + len(srv.addrsToBroadcast) > 0 { // Choose a key at random. This works because map iteration is random in golang. bucket := "" - for kk := range srv.addrsToBroadcastt { + for kk := range srv.addrsToBroadcast { bucket = kk break } // Remove the last element from the slice for the given bucket. - currentAddrList := srv.addrsToBroadcastt[bucket] + currentAddrList := srv.addrsToBroadcast[bucket] if len(currentAddrList) > 0 { lastIndex := len(currentAddrList) - 1 currentAddr := currentAddrList[lastIndex] currentAddrList = currentAddrList[:lastIndex] if len(currentAddrList) == 0 { - delete(srv.addrsToBroadcastt, bucket) + delete(srv.addrsToBroadcast, bucket) } else { - srv.addrsToBroadcastt[bucket] = currentAddrList + srv.addrsToBroadcast[bucket] = currentAddrList } addrsToBroadcast = append(addrsToBroadcast, currentAddr) @@ -2478,18 +2495,24 @@ func (srv *Server) _startAddressRelayer() { if atomic.LoadInt32(&srv.shutdown) >= 1 { break } - // For the first ten minutes after the server starts, relay our address to all + // For the first ten minutes after the connection controller starts, relay our address to all // peers. After the first ten minutes, do it once every 24 hours. - // TODO: temporary - addressMgr := addrmgr.New("", net.LookupIP) - glog.V(1).Infof("Server.Start._startAddressRelayer: Relaying our own addr to peers") + glog.V(1).Infof("Server.startAddressRelayer: Relaying our own addr to peers") + remoteNodes := srv.connectionController.rnManager.GetAllRemoteNodes().GetAll() if numMinutesPassed < 10 || numMinutesPassed%(RebroadcastNodeAddrIntervalMinutes) == 0 { - for _, pp := range srv.cmgr.GetAllPeers() { - bestAddress := addressMgr.GetBestLocalAddress(pp.netAddr) + for _, rn := range remoteNodes { + if !rn.IsHandshakeCompleted() { + continue + } + netAddr := rn.GetNetAddress() + if netAddr == nil { + continue + } + bestAddress := srv.AddrMgr.GetBestLocalAddress(netAddr) if bestAddress != nil { - glog.V(2).Infof("Server.Start._startAddressRelayer: Relaying address %v to "+ - "peer %v", bestAddress.IP.String(), pp) - pp.AddDeSoMessage(&MsgDeSoAddr{ + glog.V(2).Infof("Server.startAddressRelayer: Relaying address %v to "+ + "RemoteNode (id= %v)", bestAddress.IP.String(), rn.GetId()) + addrMsg := &MsgDeSoAddr{ AddrList: []*SingleAddr{ { Timestamp: time.Now(), @@ -2498,27 +2521,38 @@ func (srv *Server) _startAddressRelayer() { Services: (ServiceFlag)(bestAddress.Services), }, }, - }, false) + } + if err := rn.SendMessage(addrMsg); err != nil { + glog.Errorf("Server.startAddressRelayer: Problem sending "+ + "MsgDeSoAddr to RemoteNode (id= %v): %v", rn.GetId(), err) + } } } } - glog.V(2).Infof("Server.Start._startAddressRelayer: Seeing if there are addrs to relay...") + glog.V(2).Infof("Server.startAddressRelayer: Seeing if there are addrs to relay...") // Broadcast the addrs we have to all of our peers. - addrsToBroadcast := srv._getAddrsToBroadcast() + addrsToBroadcast := srv.getAddrsToBroadcast() if len(addrsToBroadcast) == 0 { - glog.V(2).Infof("Server.Start._startAddressRelayer: No addrs to relay.") + glog.V(2).Infof("Server.startAddressRelayer: No addrs to relay.") time.Sleep(AddrRelayIntervalSeconds * time.Second) continue } - glog.V(2).Infof("Server.Start._startAddressRelayer: Found %d addrs to "+ + glog.V(2).Infof("Server.startAddressRelayer: Found %d addrs to "+ "relay: %v", len(addrsToBroadcast), spew.Sdump(addrsToBroadcast)) // Iterate over all our peers and broadcast the addrs to all of them. - for _, pp := range srv.cmgr.GetAllPeers() { - pp.AddDeSoMessage(&MsgDeSoAddr{ + for _, rn := range remoteNodes { + if !rn.IsHandshakeCompleted() { + continue + } + addrMsg := &MsgDeSoAddr{ AddrList: addrsToBroadcast, - }, false) + } + if err := rn.SendMessage(addrMsg); err != nil { + glog.Errorf("Server.startAddressRelayer: Problem sending "+ + "MsgDeSoAddr to RemoteNode (id= %v): %v", rn.GetId(), err) + } } time.Sleep(AddrRelayIntervalSeconds * time.Second) continue From baa91ae3140abb76e0667c95e6affba0fda934d9 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Mon, 29 Jan 2024 21:16:46 -0800 Subject: [PATCH 14/37] PoS NetworkManager Rename and Nits (#959) * Renames * nits * More renames * Review --- ...st.go => network_manager_routines_test.go} | 58 +-- ...roller_test.go => network_manager_test.go} | 106 +++--- ..._test.go => network_manager_utils_test.go} | 14 +- lib/handshake_controller.go | 78 ++-- ...ction_controller.go => network_manager.go} | 360 +++++++++--------- lib/peer.go | 11 - lib/remote_node.go | 6 +- lib/remote_node_manager.go | 1 + lib/server.go | 60 +-- 9 files changed, 355 insertions(+), 339 deletions(-) rename integration_testing/{connection_controller_routines_test.go => network_manager_routines_test.go} (93%) rename integration_testing/{connection_controller_test.go => network_manager_test.go} (86%) rename integration_testing/{connection_controller_utils_test.go => network_manager_utils_test.go} (94%) rename lib/{connection_controller.go => network_manager.go} (60%) diff --git a/integration_testing/connection_controller_routines_test.go b/integration_testing/network_manager_routines_test.go similarity index 93% rename from integration_testing/connection_controller_routines_test.go rename to integration_testing/network_manager_routines_test.go index 1f30e22b5..141e36026 100644 --- a/integration_testing/connection_controller_routines_test.go +++ b/integration_testing/network_manager_routines_test.go @@ -323,8 +323,8 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { node2 = startNode(t, node2) node3 = startNode(t, node3) - cc2 := node2.Server.GetConnectionController() - require.NoError(t, cc2.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm2 := node2.Server.GetNetworkManager() + require.NoError(t, nm2.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) // First wait for node2 to be indexed as a validator by node1. waitForValidatorConnection(t, node1, node2) // Now wait for node2 to be re-indexed as a non-validator. @@ -332,8 +332,8 @@ func TestConnectionControllerValidatorInboundDeduplication(t *testing.T) { waitForNonValidatorOutboundConnection(t, node2, node1) // Now connect node3 to node1. - cc3 := node3.Server.GetConnectionController() - require.NoError(t, cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm3 := node3.Server.GetNetworkManager() + require.NoError(t, nm3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) // First wait for node3 to be indexed as a validator by node1. waitForValidatorConnection(t, node1, node3) // Now wait for node3 to be re-indexed as a non-validator. @@ -373,9 +373,9 @@ func TestConnectionControllerNonValidatorConnectorOutbound(t *testing.T) { } node1 = startNode(t, node1) - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node5.Listeners[0].Addr().String())) - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node6.Listeners[0].Addr().String())) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node5.Listeners[0].Addr().String())) + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node6.Listeners[0].Addr().String())) waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 3, 0, 3, 0) waitForNonValidatorOutboundConnection(t, node1, node2) @@ -435,24 +435,24 @@ func TestConnectionControllerNonValidatorConnectorInbound(t *testing.T) { node10 = startNode(t, node10) // Connect node1 to node2, node3, node7, and node8. - cc1 := node1.Server.GetConnectionController() - require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) - require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) - require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) - require.NoError(t, cc1.CreateNonValidatorOutboundConnection(node8.Listeners[0].Addr().String())) + nm1 := node1.Server.GetNetworkManager() + require.NoError(t, nm1.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + require.NoError(t, nm1.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + require.NoError(t, nm1.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) + require.NoError(t, nm1.CreateNonValidatorOutboundConnection(node8.Listeners[0].Addr().String())) // Connect node4, node5, node6 to node1. - cc4 := node4.Server.GetConnectionController() - require.NoError(t, cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) - cc5 := node5.Server.GetConnectionController() - require.NoError(t, cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) - cc6 := node6.Server.GetConnectionController() - require.NoError(t, cc6.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm4 := node4.Server.GetNetworkManager() + require.NoError(t, nm4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm5 := node5.Server.GetNetworkManager() + require.NoError(t, nm5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm6 := node6.Server.GetNetworkManager() + require.NoError(t, nm6.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) // Connect node9, node10 to node1. - cc9 := node9.Server.GetConnectionController() - require.NoError(t, cc9.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) - cc10 := node10.Server.GetConnectionController() - require.NoError(t, cc10.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm9 := node9.Server.GetNetworkManager() + require.NoError(t, nm9.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm10 := node10.Server.GetNetworkManager() + require.NoError(t, nm10.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) activeValidatorsMap := getActiveValidatorsMapWithValidatorNodes(t, node1, node2, node3, node4, node5, node6) setActiveValidators(activeValidatorsMap, node1, node2, node3, node4, node5, node6, node7, node8, node9, node10) @@ -473,12 +473,12 @@ func TestConnectionControllerNonValidatorConnectorAddressMgr(t *testing.T) { node1.Config.MaxInboundPeers = 0 node1 = startNode(t, node1) - cc := node1.Server.GetConnectionController() - na1, err := cc.ConvertIPStringToNetAddress("deso-seed-2.io:17000") - na2, err := cc.ConvertIPStringToNetAddress("deso-seed-3.io:17000") + nm := node1.Server.GetNetworkManager() + na1, err := nm.ConvertIPStringToNetAddress("deso-seed-2.io:17000") + na2, err := nm.ConvertIPStringToNetAddress("deso-seed-3.io:17000") require.NoError(t, err) - cc.AddrMgr.AddAddress(na1, na1) - cc.AddrMgr.AddAddress(na2, na2) + nm.AddrMgr.AddAddress(na1, na1) + nm.AddrMgr.AddAddress(na2, na2) waitForCountRemoteNodeIndexer(t, node1, 2, 0, 2, 0) } @@ -498,7 +498,7 @@ func getActiveValidatorsMapWithValidatorNodes(t *testing.T, validators ...*cmd.N func setActiveValidators(validatorMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator], nodes ...*cmd.Node) { for _, node := range nodes { - node.Server.GetConnectionController().SetActiveValidatorsMap(validatorMap) + node.Server.GetNetworkManager().SetActiveValidatorsMap(validatorMap) } } @@ -539,7 +539,7 @@ func waitForMinNonValidatorCountRemoteNodeIndexer(t *testing.T, node *cmd.Node, minNonValidatorOutboundCount int, minNonValidatorInboundCount int) { userAgent := node.Params.UserAgent - rnManager := node.Server.GetConnectionController().GetRemoteNodeManager() + rnManager := node.Server.GetNetworkManager().GetRemoteNodeManager() condition := func() bool { return checkRemoteNodeIndexerMinNonValidatorCount(rnManager, allCount, validatorCount, minNonValidatorOutboundCount, minNonValidatorInboundCount) diff --git a/integration_testing/connection_controller_test.go b/integration_testing/network_manager_test.go similarity index 86% rename from integration_testing/connection_controller_test.go rename to integration_testing/network_manager_test.go index 63976fbd4..8c883b973 100644 --- a/integration_testing/connection_controller_test.go +++ b/integration_testing/network_manager_test.go @@ -18,8 +18,8 @@ func TestConnectionControllerNonValidator(t *testing.T) { node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) @@ -34,8 +34,8 @@ func TestConnectionControllerNonValidator(t *testing.T) { node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) @@ -50,8 +50,8 @@ func TestConnectionControllerNonValidator(t *testing.T) { node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForNonValidatorInboundConnection(t, node4, node1) t.Logf("Test #3 passed | Successfully created outbound connection from NonValidator Node1 to Validator Node4") @@ -74,8 +74,8 @@ func TestConnectionControllerValidator(t *testing.T) { node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPub2)) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsPub2)) waitForValidatorConnection(t, node1, node2) waitForValidatorConnection(t, node2, node1) @@ -88,8 +88,8 @@ func TestConnectionControllerValidator(t *testing.T) { node3.Params.DisableNetworkManagerRoutines = true node3 = startNode(t, node3) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node3) waitForValidatorConnection(t, node3, node1) @@ -104,8 +104,8 @@ func TestConnectionControllerValidator(t *testing.T) { node4.Params.DisableNetworkManagerRoutines = true node4 = startNode(t, node4) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForValidatorConnection(t, node4, node1) t.Logf("Test #3 passed | Successfully created non-validator outbound connection from Validator Node1 to Validator Node4") @@ -127,8 +127,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node1 = startNode(t, node1) node2 = startNode(t, node2) - cc := node2.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm := node2.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node2) t.Logf("Test #1 passed | Successfuly disconnected node with SFValidator flag and ProtocolVersion1 mismatch") @@ -141,8 +141,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node3.Params.ProtocolVersion = lib.ProtocolVersionType(3) node3 = startNode(t, node3) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node3.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node3) t.Logf("Test #2 passed | Successfuly disconnected node with ProtocolVersion3") @@ -153,8 +153,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node4.Params.ProtocolVersion = lib.ProtocolVersion0 node4 = startNode(t, node4) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node4.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node4) t.Logf("Test #3 passed | Successfuly disconnected node with ProtocolVersion0") @@ -170,8 +170,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5Wrong.GetSigner().GetPublicKey())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5Wrong.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node5) t.Logf("Test #4 passed | Successfuly disconnected node with public key mismatch") @@ -183,8 +183,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node6.Params.DisableNetworkManagerRoutines = true node6 = startNode(t, node6) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateValidatorConnection(node6.Listeners[0].Addr().String(), blsPriv6.PublicKey())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateValidatorConnection(node6.Listeners[0].Addr().String(), blsPriv6.PublicKey())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node6) t.Logf("Test #5 passed | Successfuly disconnected supposed validator node with missing SFPosValidator flag") @@ -195,8 +195,8 @@ func TestConnectionControllerHandshakeDataErrors(t *testing.T) { node7.Params.ProtocolVersion = lib.ProtocolVersion1 node7 = startNode(t, node7) - cc = node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) + nm = node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node7.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node7) t.Logf("Test #6 passed | Successfuly disconnected outbound non-validator node with ProtocolVersion1") @@ -213,8 +213,8 @@ func TestConnectionControllerHandshakeTimeouts(t *testing.T) { node2.Params.DisableNetworkManagerRoutines = true node2 = startNode(t, node2) - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node2) t.Logf("Test #1 passed | Successfuly disconnected node after version negotiation timeout") @@ -226,8 +226,8 @@ func TestConnectionControllerHandshakeTimeouts(t *testing.T) { node3.Params.VerackNegotiationTimeout = 0 node3 = startNode(t, node3) - cc = node3.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm = node3.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node1) waitForEmptyRemoteNodeIndexer(t, node3) t.Logf("Test #2 passed | Successfuly disconnected node after verack exchange timeout") @@ -248,8 +248,8 @@ func TestConnectionControllerHandshakeTimeouts(t *testing.T) { node5.Params.DisableNetworkManagerRoutines = true node5 = startNode(t, node5) - cc = node4.Server.GetConnectionController() - require.NoError(t, cc.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5.GetSigner().GetPublicKey())) + nm = node4.Server.GetNetworkManager() + require.NoError(t, nm.CreateValidatorConnection(node5.Listeners[0].Addr().String(), blsKeyStore5.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node5) t.Logf("Test #3 passed | Successfuly disconnected validator node after handshake timeout") @@ -275,17 +275,17 @@ func TestConnectionControllerValidatorDuplication(t *testing.T) { node3 = startNode(t, node3) // Create validator connection from Node1 to Node2 and from Node1 to Node3 - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateValidatorConnection(node2.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) // This should fail out right because Node3 has a duplicate public key. - require.Error(t, cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) + require.Error(t, nm.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore2.GetSigner().GetPublicKey())) waitForValidatorConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) // Now create an outbound connection from Node3 to Node1, which should pass handshake, but then fail because // Node1 already has a validator connection to Node2 with the same public key. - cc3 := node3.Server.GetConnectionController() - require.NoError(t, cc3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm3 := node3.Server.GetNetworkManager() + require.NoError(t, nm3.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node3) waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) t.Logf("Test #1 passed | Successfuly rejected duplicate validator connection with inbound/outbound validators") @@ -306,12 +306,12 @@ func TestConnectionControllerValidatorDuplication(t *testing.T) { node5 = startNode(t, node5) // Create validator connections from Node4 to Node1 and from Node5 to Node1 - cc4 := node4.Server.GetConnectionController() - require.NoError(t, cc4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm4 := node4.Server.GetNetworkManager() + require.NoError(t, nm4.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForValidatorConnection(t, node1, node4) waitForNonValidatorOutboundConnection(t, node4, node1) - cc5 := node5.Server.GetConnectionController() - require.NoError(t, cc5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm5 := node5.Server.GetNetworkManager() + require.NoError(t, nm5.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node5) waitForCountRemoteNodeIndexer(t, node1, 1, 1, 0, 0) t.Logf("Test #2 passed | Successfuly rejected duplicate validator connection with multiple outbound validators") @@ -330,8 +330,8 @@ func TestConnectionControllerProtocolDifference(t *testing.T) { node2 = startNode(t, node2) // Create non-validator connection from Node1 to Node2 - cc := node1.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) + nm := node1.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node2.Listeners[0].Addr().String())) waitForNonValidatorOutboundConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) t.Logf("Test #1 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 non-validator") @@ -346,7 +346,7 @@ func TestConnectionControllerProtocolDifference(t *testing.T) { node3 = startNode(t, node3) // Create validator connection from Node1 to Node3 - require.NoError(t, cc.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore3.GetSigner().GetPublicKey())) + require.NoError(t, nm.CreateValidatorConnection(node3.Listeners[0].Addr().String(), blsKeyStore3.GetSigner().GetPublicKey())) waitForValidatorConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) t.Logf("Test #2 passed | Successfuly connected to a ProtocolVersion1 node with a ProtocolVersion2 validator") @@ -365,14 +365,14 @@ func TestConnectionControllerProtocolDifference(t *testing.T) { node4 = startNode(t, node4) // Attempt to create non-validator connection from Node4 to Node1 - cc = node4.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm = node4.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #3 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") // Attempt to create validator connection from Node4 to Node1 - require.NoError(t, cc.CreateValidatorConnection(node1.Listeners[0].Addr().String(), blsKeyStore4.GetSigner().GetPublicKey())) + require.NoError(t, nm.CreateValidatorConnection(node1.Listeners[0].Addr().String(), blsKeyStore4.GetSigner().GetPublicKey())) waitForEmptyRemoteNodeIndexer(t, node4) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #4 passed | Successfuly rejected validator connection from ProtocolVersion2 node to ProtcolVersion1 node") @@ -383,8 +383,8 @@ func TestConnectionControllerProtocolDifference(t *testing.T) { node5 = startNode(t, node5) // Attempt to create non-validator connection from Node5 to Node1 - cc = node5.Server.GetConnectionController() - require.NoError(t, cc.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) + nm = node5.Server.GetNetworkManager() + require.NoError(t, nm.CreateNonValidatorOutboundConnection(node1.Listeners[0].Addr().String())) waitForEmptyRemoteNodeIndexer(t, node5) waitForEmptyRemoteNodeIndexer(t, node1) t.Logf("Test #5 passed | Successfuly rejected outbound connection from ProtocolVersion2 node to ProtcolVersion1 node") @@ -404,8 +404,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { node2 = startNode(t, node2) // Create a persistent connection from Node1 to Node2 - cc := node1.Server.GetConnectionController() - _, err = cc.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String()) + nm := node1.Server.GetNetworkManager() + _, err = nm.CreateNonValidatorPersistentOutboundConnection(node2.Listeners[0].Addr().String()) require.NoError(t, err) waitForValidatorConnection(t, node1, node2) waitForNonValidatorInboundConnection(t, node2, node1) @@ -419,7 +419,7 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { node3 = startNode(t, node3) // Create a persistent connection from Node1 to Node3 - _, err = cc.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String()) + _, err = nm.CreateNonValidatorPersistentOutboundConnection(node3.Listeners[0].Addr().String()) require.NoError(t, err) waitForNonValidatorOutboundConnection(t, node1, node3) waitForNonValidatorInboundConnection(t, node3, node1) @@ -441,8 +441,8 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { node5 = startNode(t, node5) // Create a persistent connection from Node4 to Node5 - cc = node4.Server.GetConnectionController() - _, err = cc.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String()) + nm = node4.Server.GetNetworkManager() + _, err = nm.CreateNonValidatorPersistentOutboundConnection(node5.Listeners[0].Addr().String()) require.NoError(t, err) waitForNonValidatorOutboundConnection(t, node4, node5) waitForValidatorConnection(t, node5, node4) @@ -458,7 +458,7 @@ func TestConnectionControllerPersistentConnection(t *testing.T) { node6 = startNode(t, node6) // Create a persistent connection from Node4 to Node6 - _, err = cc.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String()) + _, err = nm.CreateNonValidatorPersistentOutboundConnection(node6.Listeners[0].Addr().String()) require.NoError(t, err) waitForValidatorConnection(t, node4, node6) waitForValidatorConnection(t, node6, node4) diff --git a/integration_testing/connection_controller_utils_test.go b/integration_testing/network_manager_utils_test.go similarity index 94% rename from integration_testing/connection_controller_utils_test.go rename to integration_testing/network_manager_utils_test.go index 94f4702c6..f14cb39d9 100644 --- a/integration_testing/connection_controller_utils_test.go +++ b/integration_testing/network_manager_utils_test.go @@ -11,7 +11,7 @@ import ( func waitForValidatorConnection(t *testing.T, node1 *cmd.Node, node2 *cmd.Node) { userAgentN1 := node1.Params.UserAgent userAgentN2 := node2.Params.UserAgent - rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManagerN1 := node1.Server.GetNetworkManager().GetRemoteNodeManager() n1ValidatedN2 := func() bool { if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, true, false, false) { return false @@ -41,7 +41,7 @@ func conditionNonValidatorOutboundConnection(t *testing.T, node1 *cmd.Node, node func conditionNonValidatorOutboundConnectionDynamic(t *testing.T, node1 *cmd.Node, node2 *cmd.Node, inactiveValidator bool) func() bool { userAgentN2 := node2.Params.UserAgent - rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManagerN1 := node1.Server.GetNetworkManager().GetRemoteNodeManager() return func() bool { if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, true, false) { return false @@ -82,7 +82,7 @@ func conditionNonValidatorInboundConnection(t *testing.T, node1 *cmd.Node, node2 func conditionNonValidatorInboundConnectionDynamic(t *testing.T, node1 *cmd.Node, node2 *cmd.Node, inactiveValidator bool) func() bool { userAgentN2 := node2.Params.UserAgent - rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManagerN1 := node1.Server.GetNetworkManager().GetRemoteNodeManager() return func() bool { if true != checkRemoteNodeIndexerUserAgent(rnManagerN1, userAgentN2, false, false, true) { return false @@ -104,7 +104,7 @@ func conditionNonValidatorInboundConnectionDynamic(t *testing.T, node1 *cmd.Node func waitForEmptyRemoteNodeIndexer(t *testing.T, node1 *cmd.Node) { userAgentN1 := node1.Params.UserAgent - rnManagerN1 := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManagerN1 := node1.Server.GetNetworkManager().GetRemoteNodeManager() n1ValidatedN2 := func() bool { if true != checkRemoteNodeIndexerEmpty(rnManagerN1) { return false @@ -118,7 +118,7 @@ func waitForCountRemoteNodeIndexer(t *testing.T, node1 *cmd.Node, allCount int, nonValidatorOutboundCount int, nonValidatorInboundCount int) { userAgent := node1.Params.UserAgent - rnManager := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManager := node1.Server.GetNetworkManager().GetRemoteNodeManager() condition := func() bool { if true != checkRemoteNodeIndexerCount(rnManager, allCount, validatorCount, nonValidatorOutboundCount, nonValidatorInboundCount) { return false @@ -132,7 +132,7 @@ func waitForCountRemoteNodeIndexerHandshakeCompleted(t *testing.T, node1 *cmd.No nonValidatorOutboundCount int, nonValidatorInboundCount int) { userAgent := node1.Params.UserAgent - rnManager := node1.Server.GetConnectionController().GetRemoteNodeManager() + rnManager := node1.Server.GetNetworkManager().GetRemoteNodeManager() condition := func() bool { return checkRemoteNodeIndexerCountHandshakeCompleted(rnManager, allCount, validatorCount, nonValidatorOutboundCount, nonValidatorInboundCount) @@ -243,7 +243,7 @@ func checkUserAgentInRemoteNodeList(userAgent string, rnList []*lib.RemoteNode) } func getRemoteNodeWithUserAgent(node *cmd.Node, userAgent string) *lib.RemoteNode { - rnManager := node.Server.GetConnectionController().GetRemoteNodeManager() + rnManager := node.Server.GetNetworkManager().GetRemoteNodeManager() rnList := rnManager.GetAllRemoteNodes().GetAll() for _, rn := range rnList { if rn.GetUserAgent() == userAgent { diff --git a/lib/handshake_controller.go b/lib/handshake_controller.go index 385e36275..2d558317f 100644 --- a/lib/handshake_controller.go +++ b/lib/handshake_controller.go @@ -8,19 +8,19 @@ import ( "sync" ) -// HandshakeController is a structure that handles the handshake process with remote nodes. It is the entry point for +// HandshakeManager is a structure that handles the handshake process with remote nodes. It is the entry point for // initiating a handshake with a remote node. It is also responsible for handling version/verack messages from remote // nodes. And for handling the handshake complete control message. -type HandshakeController struct { +type HandshakeManager struct { mtxHandshakeComplete sync.Mutex rnManager *RemoteNodeManager usedNonces lru.Cache } -func NewHandshakeController(rnManager *RemoteNodeManager) *HandshakeController { +func NewHandshakeController(rnManager *RemoteNodeManager) *HandshakeManager { - vm := &HandshakeController{ + vm := &HandshakeManager{ rnManager: rnManager, usedNonces: lru.NewCache(1000), } @@ -29,20 +29,20 @@ func NewHandshakeController(rnManager *RemoteNodeManager) *HandshakeController { } // InitiateHandshake kicks off handshake with a remote node. -func (hc *HandshakeController) InitiateHandshake(rn *RemoteNode) { +func (hm *HandshakeManager) InitiateHandshake(rn *RemoteNode) { nonce := uint64(RandInt64(math.MaxInt64)) if err := rn.InitiateHandshake(nonce); err != nil { glog.Errorf("RemoteNode.InitiateHandshake: Error initiating handshake: %v", err) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) } - hc.usedNonces.Add(nonce) + hm.usedNonces.Add(nonce) } // handleHandshakeComplete handles HandshakeComplete control messages, sent by RemoteNodes. -func (hc *HandshakeController) handleHandshakeComplete(remoteNode *RemoteNode) { +func (hm *HandshakeManager) handleHandshakeComplete(remoteNode *RemoteNode) { // Prevent race conditions while handling handshake complete messages. - hc.mtxHandshakeComplete.Lock() - defer hc.mtxHandshakeComplete.Unlock() + hm.mtxHandshakeComplete.Lock() + defer hm.mtxHandshakeComplete.Unlock() // Get the handshake information of this peer. if remoteNode == nil { @@ -50,20 +50,20 @@ func (hc *HandshakeController) handleHandshakeComplete(remoteNode *RemoteNode) { } if remoteNode.GetNegotiatedProtocolVersion().Before(ProtocolVersion2) { - hc.rnManager.ProcessCompletedHandshake(remoteNode) + hm.rnManager.ProcessCompletedHandshake(remoteNode) return } - if err := hc.handleHandshakeCompletePoSMessage(remoteNode); err != nil { - glog.Errorf("HandshakeController.handleHandshakeComplete: Error handling PoS handshake peer message: %v, "+ + if err := hm.handleHandshakeCompletePoSMessage(remoteNode); err != nil { + glog.Errorf("HandshakeManager.handleHandshakeComplete: Error handling PoS handshake peer message: %v, "+ "remoteNodePk (%s)", err, remoteNode.GetValidatorPublicKey().Serialize()) - hc.rnManager.Disconnect(remoteNode) + hm.rnManager.Disconnect(remoteNode) return } - hc.rnManager.ProcessCompletedHandshake(remoteNode) + hm.rnManager.ProcessCompletedHandshake(remoteNode) } -func (hc *HandshakeController) handleHandshakeCompletePoSMessage(remoteNode *RemoteNode) error { +func (hm *HandshakeManager) handleHandshakeCompletePoSMessage(remoteNode *RemoteNode) error { validatorPk := remoteNode.GetValidatorPublicKey() // If the remote node is not a potential validator, we don't need to do anything. @@ -72,11 +72,11 @@ func (hc *HandshakeController) handleHandshakeCompletePoSMessage(remoteNode *Rem } // Lookup the validator in the ValidatorIndex with the same public key. - existingValidator, ok := hc.rnManager.GetValidatorIndex().Get(validatorPk.Serialize()) + existingValidator, ok := hm.rnManager.GetValidatorIndex().Get(validatorPk.Serialize()) // For inbound RemoteNodes, we should ensure that there isn't an existing validator connected with the same public key. // Inbound nodes are not initiated by us, so we shouldn't have added the RemoteNode to the ValidatorIndex yet. if remoteNode.IsInbound() && ok { - return fmt.Errorf("HandshakeController.handleHandshakeCompletePoSMessage: Inbound RemoteNode with duplicate validator public key") + return fmt.Errorf("HandshakeManager.handleHandshakeCompletePoSMessage: Inbound RemoteNode with duplicate validator public key") } // For outbound RemoteNodes, we have two possible scenarios. Either the RemoteNode has been initiated as a validator, // in which case it should already be in the ValidatorIndex. Or the RemoteNode has been initiated as a regular node, @@ -85,20 +85,20 @@ func (hc *HandshakeController) handleHandshakeCompletePoSMessage(remoteNode *Rem // with the RemoteNode's public key. If there is one, we want to ensure that these two RemoteNodes have identical ids. if remoteNode.IsOutbound() && ok { if remoteNode.GetId() != existingValidator.GetId() { - return fmt.Errorf("HandshakeController.handleHandshakeCompletePoSMessage: Outbound RemoteNode with duplicate validator public key. "+ + return fmt.Errorf("HandshakeManager.handleHandshakeCompletePoSMessage: Outbound RemoteNode with duplicate validator public key. "+ "Existing validator id: %v, new validator id: %v", existingValidator.GetId().ToUint64(), remoteNode.GetId().ToUint64()) } } return nil } -// _handleVersionMessage handles version messages, sent by RemoteNodes. -func (hc *HandshakeController) _handleVersionMessage(origin *Peer, desoMsg DeSoMessage) { +// handleVersionMessage handles version messages, sent by RemoteNodes. +func (hm *HandshakeManager) handleVersionMessage(origin *Peer, desoMsg DeSoMessage) { if desoMsg.GetMsgType() != MsgTypeVersion { return } - rn := hc.rnManager.GetRemoteNodeFromPeer(origin) + rn := hm.rnManager.GetRemoteNodeFromPeer(origin) if rn == nil { // This should never happen. return @@ -107,41 +107,41 @@ func (hc *HandshakeController) _handleVersionMessage(origin *Peer, desoMsg DeSoM var verMsg *MsgDeSoVersion var ok bool if verMsg, ok = desoMsg.(*MsgDeSoVersion); !ok { - glog.Errorf("HandshakeController._handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ + glog.Errorf("HandshakeManager.handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ "error casting version message", origin.ID) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) return } // If we've seen this nonce before then return an error since this is a connection from ourselves. msgNonce := verMsg.Nonce - if hc.usedNonces.Contains(msgNonce) { - hc.usedNonces.Delete(msgNonce) - glog.Errorf("HandshakeController._handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ + if hm.usedNonces.Contains(msgNonce) { + hm.usedNonces.Delete(msgNonce) + glog.Errorf("HandshakeManager.handleVersionMessage: Disconnecting RemoteNode with id: (%v) "+ "nonce collision, nonce (%v)", origin.ID, msgNonce) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) return } // Call HandleVersionMessage on the RemoteNode. responseNonce := uint64(RandInt64(math.MaxInt64)) if err := rn.HandleVersionMessage(verMsg, responseNonce); err != nil { - glog.Errorf("HandshakeController._handleVersionMessage: Requesting PeerDisconnect for id: (%v) "+ + glog.Errorf("HandshakeManager.handleVersionMessage: Requesting PeerDisconnect for id: (%v) "+ "error handling version message: %v", origin.ID, err) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) return } - hc.usedNonces.Add(responseNonce) + hm.usedNonces.Add(responseNonce) } -// _handleVerackMessage handles verack messages, sent by RemoteNodes. -func (hc *HandshakeController) _handleVerackMessage(origin *Peer, desoMsg DeSoMessage) { +// handleVerackMessage handles verack messages, sent by RemoteNodes. +func (hm *HandshakeManager) handleVerackMessage(origin *Peer, desoMsg DeSoMessage) { if desoMsg.GetMsgType() != MsgTypeVerack { return } - rn := hc.rnManager.GetRemoteNodeFromPeer(origin) + rn := hm.rnManager.GetRemoteNodeFromPeer(origin) if rn == nil { // This should never happen. return @@ -150,19 +150,19 @@ func (hc *HandshakeController) _handleVerackMessage(origin *Peer, desoMsg DeSoMe var vrkMsg *MsgDeSoVerack var ok bool if vrkMsg, ok = desoMsg.(*MsgDeSoVerack); !ok { - glog.Errorf("HandshakeController._handleVerackMessage: Disconnecting RemoteNode with id: (%v) "+ + glog.Errorf("HandshakeManager.handleVerackMessage: Disconnecting RemoteNode with id: (%v) "+ "error casting verack message", origin.ID) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) return } // Call HandleVerackMessage on the RemoteNode. if err := rn.HandleVerackMessage(vrkMsg); err != nil { - glog.Errorf("HandshakeController._handleVerackMessage: Requesting PeerDisconnect for id: (%v) "+ + glog.Errorf("HandshakeManager.handleVerackMessage: Requesting PeerDisconnect for id: (%v) "+ "error handling verack message: %v", origin.ID, err) - hc.rnManager.Disconnect(rn) + hm.rnManager.Disconnect(rn) return } - hc.handleHandshakeComplete(rn) + hm.handleHandshakeComplete(rn) } diff --git a/lib/connection_controller.go b/lib/network_manager.go similarity index 60% rename from lib/connection_controller.go rename to lib/network_manager.go index ef021e100..0227a6ac9 100644 --- a/lib/connection_controller.go +++ b/lib/network_manager.go @@ -15,19 +15,19 @@ import ( "time" ) -// ConnectionController is a structure that oversees all connections to remote nodes. It is responsible for kicking off +// NetworkManager is a structure that oversees all connections to remote nodes. It is responsible for kicking off // the initial connections a node makes to the network. It is also responsible for creating RemoteNodes from all -// successful outbound and inbound connections. The ConnectionController also ensures that the node is connected to +// successful outbound and inbound connections. The NetworkManager also ensures that the node is connected to // the active validators, once the node reaches Proof of Stake. // TODO: Document more in later PRs -type ConnectionController struct { +type NetworkManager struct { // The parameters we are initialized with. params *DeSoParams cmgr *ConnectionManager blsKeystore *BLSKeystore - handshake *HandshakeController + handshake *HandshakeManager rnManager *RemoteNodeManager @@ -45,7 +45,7 @@ type ConnectionController struct { activeValidatorsMapLock sync.RWMutex // activeValidatorsMap is a map of all currently active validators registered in consensus. It will be updated - // periodically by the owner of the ConnectionController. + // periodically by the owner of the NetworkManager. activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] // The target number of non-validator outbound remote nodes we want to have. We will disconnect remote nodes once @@ -63,16 +63,16 @@ type ConnectionController struct { exitGroup sync.WaitGroup } -func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handshakeController *HandshakeController, - rnManager *RemoteNodeManager, blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, connectIps []string, +func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, rnManager *RemoteNodeManager, + blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, connectIps []string, targetNonValidatorOutboundRemoteNodes uint32, targetNonValidatorInboundRemoteNodes uint32, - limitOneInboundConnectionPerIP bool) *ConnectionController { + limitOneInboundConnectionPerIP bool) *NetworkManager { - return &ConnectionController{ + return &NetworkManager{ params: params, cmgr: cmgr, blsKeystore: blsKeystore, - handshake: handshakeController, + handshake: NewHandshakeController(rnManager), rnManager: rnManager, AddrMgr: addrMgr, connectIps: connectIps, @@ -85,42 +85,42 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, handsh } } -func (cc *ConnectionController) Start() { - if cc.params.DisableNetworkManagerRoutines { +func (nm *NetworkManager) Start() { + if nm.params.DisableNetworkManagerRoutines { return } - cc.startGroup.Add(4) - go cc.startPersistentConnector() - go cc.startValidatorConnector() - go cc.startNonValidatorConnector() - go cc.startRemoteNodeCleanup() + nm.startGroup.Add(4) + go nm.startPersistentConnector() + go nm.startValidatorConnector() + go nm.startNonValidatorConnector() + go nm.startRemoteNodeCleanup() - cc.startGroup.Wait() + nm.startGroup.Wait() } -func (cc *ConnectionController) Stop() { - if !cc.params.DisableNetworkManagerRoutines { - cc.exitGroup.Add(4) - close(cc.exitChan) - cc.exitGroup.Wait() +func (nm *NetworkManager) Stop() { + if !nm.params.DisableNetworkManagerRoutines { + nm.exitGroup.Add(4) + close(nm.exitChan) + nm.exitGroup.Wait() } - cc.rnManager.DisconnectAll() + nm.rnManager.DisconnectAll() } -func (cc *ConnectionController) GetRemoteNodeManager() *RemoteNodeManager { - return cc.rnManager +func (nm *NetworkManager) GetRemoteNodeManager() *RemoteNodeManager { + return nm.rnManager } -func (cc *ConnectionController) startPersistentConnector() { - cc.startGroup.Done() +func (nm *NetworkManager) startPersistentConnector() { + nm.startGroup.Done() for { select { - case <-cc.exitChan: - cc.exitGroup.Done() + case <-nm.exitChan: + nm.exitGroup.Done() return case <-time.After(1 * time.Second): - cc.refreshConnectIps() + nm.refreshConnectIps() } } } @@ -130,17 +130,17 @@ func (cc *ConnectionController) startPersistentConnector() { // are validators. If they are, it adds them to the validator index. It also checks if any of the existing validators // are no longer active and removes them from the validator index. Second, it checks if any of the active validators // are missing from the validator index. If they are, it attempts to connect to them. -func (cc *ConnectionController) startValidatorConnector() { - cc.startGroup.Done() +func (nm *NetworkManager) startValidatorConnector() { + nm.startGroup.Done() for { select { - case <-cc.exitChan: - cc.exitGroup.Done() + case <-nm.exitChan: + nm.exitGroup.Done() return case <-time.After(1 * time.Second): - activeValidatorsMap := cc.getActiveValidatorsMap() - cc.refreshValidatorIndex(activeValidatorsMap) - cc.connectValidators(activeValidatorsMap) + activeValidatorsMap := nm.getActiveValidatorsMap() + nm.refreshValidatorIndex(activeValidatorsMap) + nm.connectValidators(activeValidatorsMap) } } } @@ -149,32 +149,32 @@ func (cc *ConnectionController) startValidatorConnector() { // and inbound remote nodes. To do this, it periodically checks the number of outbound and inbound remote nodes, and // if the number is above the target number, it disconnects the excess remote nodes. If the number is below the target // number, it attempts to connect to new remote nodes. -func (cc *ConnectionController) startNonValidatorConnector() { - cc.startGroup.Done() +func (nm *NetworkManager) startNonValidatorConnector() { + nm.startGroup.Done() for { select { - case <-cc.exitChan: - cc.exitGroup.Done() + case <-nm.exitChan: + nm.exitGroup.Done() return case <-time.After(1 * time.Second): - cc.refreshNonValidatorOutboundIndex() - cc.refreshNonValidatorInboundIndex() - cc.connectNonValidators() + nm.refreshNonValidatorOutboundIndex() + nm.refreshNonValidatorInboundIndex() + nm.connectNonValidators() } } } -func (cc *ConnectionController) startRemoteNodeCleanup() { - cc.startGroup.Done() +func (nm *NetworkManager) startRemoteNodeCleanup() { + nm.startGroup.Done() for { select { - case <-cc.exitChan: - cc.exitGroup.Done() + case <-nm.exitChan: + nm.exitGroup.Done() return case <-time.After(1 * time.Second): - cc.rnManager.Cleanup() + nm.rnManager.Cleanup() } } @@ -184,26 +184,34 @@ func (cc *ConnectionController) startRemoteNodeCleanup() { // ## Handlers (Peer, DeSoMessage) // ########################### -func (cc *ConnectionController) _handleDonePeerMessage(origin *Peer, desoMsg DeSoMessage) { +func (nm *NetworkManager) _handleVersionMessage(origin *Peer, desoMsg DeSoMessage) { + nm.handshake.handleVersionMessage(origin, desoMsg) +} + +func (nm *NetworkManager) _handleVerackMessage(origin *Peer, desoMsg DeSoMessage) { + nm.handshake.handleVerackMessage(origin, desoMsg) +} + +func (nm *NetworkManager) _handleDonePeerMessage(origin *Peer, desoMsg DeSoMessage) { if desoMsg.GetMsgType() != MsgTypeDisconnectedPeer { return } - glog.V(2).Infof("ConnectionController.handleDonePeerMessage: Handling disconnected peer message for "+ + glog.V(2).Infof("NetworkManager.handleDonePeerMessage: Handling disconnected peer message for "+ "id=%v", origin.ID) - cc.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) + nm.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) // Update the persistentIpToRemoteNodeIdsMap. - ipRemoteNodeIdMap := cc.persistentIpToRemoteNodeIdsMap.ToMap() + ipRemoteNodeIdMap := nm.persistentIpToRemoteNodeIdsMap.ToMap() for ip, id := range ipRemoteNodeIdMap { if id.ToUint64() == origin.ID { - cc.persistentIpToRemoteNodeIdsMap.Remove(ip) + nm.persistentIpToRemoteNodeIdsMap.Remove(ip) } } } // _handleNewConnectionMessage is called when a new outbound or inbound connection is established. It is responsible // for creating a RemoteNode from the connection and initiating the handshake. The incoming DeSoMessage is a control message. -func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMsg DeSoMessage) { +func (nm *NetworkManager) _handleNewConnectionMessage(origin *Peer, desoMsg DeSoMessage) { if desoMsg.GetMsgType() != MsgTypeNewConnection { return } @@ -217,70 +225,70 @@ func (cc *ConnectionController) _handleNewConnectionMessage(origin *Peer, desoMs var err error switch msg.Connection.GetConnectionType() { case ConnectionTypeInbound: - remoteNode, err = cc.processInboundConnection(msg.Connection) + remoteNode, err = nm.processInboundConnection(msg.Connection) if err != nil { - glog.Errorf("ConnectionController.handleNewConnectionMessage: Problem handling inbound connection: %v", err) - cc.cleanupFailedInboundConnection(remoteNode, msg.Connection) + glog.Errorf("NetworkManager.handleNewConnectionMessage: Problem handling inbound connection: %v", err) + nm.cleanupFailedInboundConnection(remoteNode, msg.Connection) return } case ConnectionTypeOutbound: - remoteNode, err = cc.processOutboundConnection(msg.Connection) + remoteNode, err = nm.processOutboundConnection(msg.Connection) if err != nil { - glog.Errorf("ConnectionController.handleNewConnectionMessage: Problem handling outbound connection: %v", err) - cc.cleanupFailedOutboundConnection(msg.Connection) + glog.Errorf("NetworkManager.handleNewConnectionMessage: Problem handling outbound connection: %v", err) + nm.cleanupFailedOutboundConnection(msg.Connection) return } } // If we made it here, we have a valid remote node. We will now initiate the handshake. - cc.handshake.InitiateHandshake(remoteNode) + nm.handshake.InitiateHandshake(remoteNode) } -func (cc *ConnectionController) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection Connection) { - glog.V(2).Infof("ConnectionController.cleanupFailedInboundConnection: Cleaning up failed inbound connection") +func (nm *NetworkManager) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection Connection) { + glog.V(2).Infof("NetworkManager.cleanupFailedInboundConnection: Cleaning up failed inbound connection") if remoteNode != nil { - cc.rnManager.Disconnect(remoteNode) + nm.rnManager.Disconnect(remoteNode) } connection.Close() } -func (cc *ConnectionController) cleanupFailedOutboundConnection(connection Connection) { +func (nm *NetworkManager) cleanupFailedOutboundConnection(connection Connection) { oc, ok := connection.(*outboundConnection) if !ok { return } - glog.V(2).Infof("ConnectionController.cleanupFailedOutboundConnection: Cleaning up failed outbound connection") + glog.V(2).Infof("NetworkManager.cleanupFailedOutboundConnection: Cleaning up failed outbound connection") id := NewRemoteNodeId(oc.attemptId) - rn := cc.rnManager.GetRemoteNodeById(id) + rn := nm.rnManager.GetRemoteNodeById(id) if rn != nil { - cc.rnManager.Disconnect(rn) + nm.rnManager.Disconnect(rn) } oc.Close() - cc.cmgr.RemoveAttemptedOutboundAddrs(oc.address) + nm.cmgr.RemoveAttemptedOutboundAddrs(oc.address) } // ########################### // ## Persistent Connections // ########################### -func (cc *ConnectionController) refreshConnectIps() { +func (nm *NetworkManager) refreshConnectIps() { // Connect to addresses passed via the --connect-ips flag. These addresses are persistent in the sense that if we // disconnect from one, we will try to reconnect to the same one. - for _, connectIp := range cc.connectIps { - if _, ok := cc.persistentIpToRemoteNodeIdsMap.Get(connectIp); ok { + for _, connectIp := range nm.connectIps { + if _, ok := nm.persistentIpToRemoteNodeIdsMap.Get(connectIp); ok { continue } - glog.Infof("ConnectionController.initiatePersistentConnections: Connecting to connectIp: %v", connectIp) - id, err := cc.CreateNonValidatorPersistentOutboundConnection(connectIp) + glog.Infof("NetworkManager.initiatePersistentConnections: Connecting to connectIp: %v", connectIp) + id, err := nm.CreateNonValidatorPersistentOutboundConnection(connectIp) if err != nil { - glog.Errorf("ConnectionController.initiatePersistentConnections: Problem connecting "+ + glog.Errorf("NetworkManager.initiatePersistentConnections: Problem connecting "+ "to connectIp %v: %v", connectIp, err) continue } - cc.persistentIpToRemoteNodeIdsMap.Set(connectIp, id) + nm.persistentIpToRemoteNodeIdsMap.Set(connectIp, id) } } @@ -288,36 +296,36 @@ func (cc *ConnectionController) refreshConnectIps() { // ## Validator Connections // ########################### -func (cc *ConnectionController) SetActiveValidatorsMap(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { - cc.activeValidatorsMapLock.Lock() - defer cc.activeValidatorsMapLock.Unlock() - cc.activeValidatorsMap = activeValidatorsMap.Clone() +func (nm *NetworkManager) SetActiveValidatorsMap(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { + nm.activeValidatorsMapLock.Lock() + defer nm.activeValidatorsMapLock.Unlock() + nm.activeValidatorsMap = activeValidatorsMap.Clone() } -func (cc *ConnectionController) getActiveValidatorsMap() *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] { - cc.activeValidatorsMapLock.RLock() - defer cc.activeValidatorsMapLock.RUnlock() - return cc.activeValidatorsMap.Clone() +func (nm *NetworkManager) getActiveValidatorsMap() *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator] { + nm.activeValidatorsMapLock.RLock() + defer nm.activeValidatorsMapLock.RUnlock() + return nm.activeValidatorsMap.Clone() } // refreshValidatorIndex re-indexes validators based on the activeValidatorsMap. It is called periodically by the // validator connector. -func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { +func (nm *NetworkManager) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { // De-index inactive validators. We skip any checks regarding RemoteNodes connection status, nor do we verify whether // de-indexing the validator would result in an excess number of outbound/inbound connections. Any excess connections // will be cleaned up by the peer connector. - validatorRemoteNodeMap := cc.rnManager.GetValidatorIndex().ToMap() + validatorRemoteNodeMap := nm.rnManager.GetValidatorIndex().ToMap() for pk, rn := range validatorRemoteNodeMap { // If the validator is no longer active, de-index it. if _, ok := activeValidatorsMap.Get(pk); !ok { - cc.rnManager.SetNonValidator(rn) - cc.rnManager.UnsetValidator(rn) + nm.rnManager.SetNonValidator(rn) + nm.rnManager.UnsetValidator(rn) } } // Look for validators in our existing outbound / inbound connections. - allNonValidators := cc.rnManager.GetAllNonValidators() + allNonValidators := nm.rnManager.GetAllNonValidators() for _, rn := range allNonValidators { // It is possible for a RemoteNode to be in the non-validator indices, and still have a public key. This can happen // if the RemoteNode advertised support for the SFValidator service flag during handshake, and provided us @@ -329,37 +337,37 @@ func (cc *ConnectionController) refreshValidatorIndex(activeValidatorsMap *colle // It is possible that through unlikely concurrence, and malevolence, two non-validators happen to have the same // public key, which goes undetected during handshake. To prevent this from affecting the indexing of the validator // set, we check that the non-validator's public key is not already present in the validator index. - if _, ok := cc.rnManager.GetValidatorIndex().Get(pk.Serialize()); ok { - glog.V(2).Infof("ConnectionController.refreshValidatorIndex: Disconnecting Validator RemoteNode "+ + if _, ok := nm.rnManager.GetValidatorIndex().Get(pk.Serialize()); ok { + glog.V(2).Infof("NetworkManager.refreshValidatorIndex: Disconnecting Validator RemoteNode "+ "(%v) has validator public key (%v) that is already present in validator index", rn, pk) - cc.rnManager.Disconnect(rn) + nm.rnManager.Disconnect(rn) continue } // If the RemoteNode turns out to be in the validator set, index it. if _, ok := activeValidatorsMap.Get(pk.Serialize()); ok { - cc.rnManager.SetValidator(rn) - cc.rnManager.UnsetNonValidator(rn) + nm.rnManager.SetValidator(rn) + nm.rnManager.UnsetNonValidator(rn) } } } // connectValidators attempts to connect to all active validators that are not already connected. It is called // periodically by the validator connector. -func (cc *ConnectionController) connectValidators(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { +func (nm *NetworkManager) connectValidators(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { // Look through the active validators and connect to any that we're not already connected to. - if cc.blsKeystore == nil { + if nm.blsKeystore == nil { return } validators := activeValidatorsMap.ToMap() for pk, validator := range validators { - _, exists := cc.rnManager.GetValidatorIndex().Get(pk) + _, exists := nm.rnManager.GetValidatorIndex().Get(pk) // If we're already connected to the validator, continue. if exists { continue } - if cc.blsKeystore.GetSigner().GetPublicKey().Serialize() == pk { + if nm.blsKeystore.GetSigner().GetPublicKey().Serialize() == pk { continue } @@ -373,8 +381,8 @@ func (cc *ConnectionController) connectValidators(activeValidatorsMap *collectio continue } address := string(validator.GetDomains()[0]) - if err := cc.CreateValidatorConnection(address, publicKey); err != nil { - glog.V(2).Infof("ConnectionController.connectValidators: Problem connecting to validator %v: %v", address, err) + if err := nm.CreateValidatorConnection(address, publicKey); err != nil { + glog.V(2).Infof("NetworkManager.connectValidators: Problem connecting to validator %v: %v", address, err) continue } } @@ -386,21 +394,21 @@ func (cc *ConnectionController) connectValidators(activeValidatorsMap *collectio // refreshNonValidatorOutboundIndex is called periodically by the peer connector. It is responsible for disconnecting excess // outbound remote nodes. -func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { +func (nm *NetworkManager) refreshNonValidatorOutboundIndex() { // There are three categories of outbound remote nodes: attempted, connected, and persistent. All of these // remote nodes are stored in the same non-validator outbound index. We want to disconnect excess remote nodes that // are not persistent, starting with the attempted nodes first. // First let's run a quick check to see if the number of our non-validator remote nodes exceeds our target. Note that // this number will include the persistent nodes. - numOutboundRemoteNodes := uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) - if numOutboundRemoteNodes <= cc.targetNonValidatorOutboundRemoteNodes { + numOutboundRemoteNodes := uint32(nm.rnManager.GetNonValidatorOutboundIndex().Count()) + if numOutboundRemoteNodes <= nm.targetNonValidatorOutboundRemoteNodes { return } // If we get here, it means that we should potentially disconnect some remote nodes. Let's first separate the // attempted and connected remote nodes, ignoring the persistent ones. - allOutboundRemoteNodes := cc.rnManager.GetNonValidatorOutboundIndex().GetAll() + allOutboundRemoteNodes := nm.rnManager.GetNonValidatorOutboundIndex().GetAll() var attemptedOutboundRemoteNodes, connectedOutboundRemoteNodes []*RemoteNode for _, rn := range allOutboundRemoteNodes { if rn.IsPersistent() || rn.IsExpectedValidator() { @@ -417,8 +425,8 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { // connected remote nodes. We can then find out how many remote nodes we need to disconnect. numOutboundRemoteNodes = uint32(len(attemptedOutboundRemoteNodes) + len(connectedOutboundRemoteNodes)) excessiveOutboundRemoteNodes := uint32(0) - if numOutboundRemoteNodes > cc.targetNonValidatorOutboundRemoteNodes { - excessiveOutboundRemoteNodes = numOutboundRemoteNodes - cc.targetNonValidatorOutboundRemoteNodes + if numOutboundRemoteNodes > nm.targetNonValidatorOutboundRemoteNodes { + excessiveOutboundRemoteNodes = numOutboundRemoteNodes - nm.targetNonValidatorOutboundRemoteNodes } // First disconnect the attempted remote nodes. @@ -426,9 +434,9 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { if excessiveOutboundRemoteNodes == 0 { break } - glog.V(2).Infof("ConnectionController.refreshNonValidatorOutboundIndex: Disconnecting attempted remote "+ + glog.V(2).Infof("NetworkManager.refreshNonValidatorOutboundIndex: Disconnecting attempted remote "+ "node (id=%v) due to excess outbound peers", rn.GetId()) - cc.rnManager.Disconnect(rn) + nm.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } // Now disconnect the connected remote nodes, if we still have too many remote nodes. @@ -436,24 +444,24 @@ func (cc *ConnectionController) refreshNonValidatorOutboundIndex() { if excessiveOutboundRemoteNodes == 0 { break } - glog.V(2).Infof("ConnectionController.refreshNonValidatorOutboundIndex: Disconnecting connected remote "+ + glog.V(2).Infof("NetworkManager.refreshNonValidatorOutboundIndex: Disconnecting connected remote "+ "node (id=%v) due to excess outbound peers", rn.GetId()) - cc.rnManager.Disconnect(rn) + nm.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } } // refreshNonValidatorInboundIndex is called periodically by the non-validator connector. It is responsible for // disconnecting excess inbound remote nodes. -func (cc *ConnectionController) refreshNonValidatorInboundIndex() { +func (nm *NetworkManager) refreshNonValidatorInboundIndex() { // First let's check if we have an excess number of inbound remote nodes. If we do, we'll disconnect some of them. - numConnectedInboundRemoteNodes := uint32(cc.rnManager.GetNonValidatorInboundIndex().Count()) - if numConnectedInboundRemoteNodes <= cc.targetNonValidatorInboundRemoteNodes { + numConnectedInboundRemoteNodes := uint32(nm.rnManager.GetNonValidatorInboundIndex().Count()) + if numConnectedInboundRemoteNodes <= nm.targetNonValidatorInboundRemoteNodes { return } // Disconnect random inbound non-validators if we have too many of them. - inboundRemoteNodes := cc.rnManager.GetNonValidatorInboundIndex().GetAll() + inboundRemoteNodes := nm.rnManager.GetNonValidatorInboundIndex().GetAll() var connectedInboundRemoteNodes []*RemoteNode for _, rn := range inboundRemoteNodes { // We only want to disconnect remote nodes that have completed handshake. @@ -463,58 +471,58 @@ func (cc *ConnectionController) refreshNonValidatorInboundIndex() { } excessiveInboundRemoteNodes := uint32(0) - if numConnectedInboundRemoteNodes > cc.targetNonValidatorInboundRemoteNodes { - excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - cc.targetNonValidatorInboundRemoteNodes + if numConnectedInboundRemoteNodes > nm.targetNonValidatorInboundRemoteNodes { + excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - nm.targetNonValidatorInboundRemoteNodes } for _, rn := range connectedInboundRemoteNodes { if excessiveInboundRemoteNodes == 0 { break } - glog.V(2).Infof("ConnectionController.refreshNonValidatorInboundIndex: Disconnecting inbound remote "+ + glog.V(2).Infof("NetworkManager.refreshNonValidatorInboundIndex: Disconnecting inbound remote "+ "node (id=%v) due to excess inbound peers", rn.GetId()) - cc.rnManager.Disconnect(rn) + nm.rnManager.Disconnect(rn) excessiveInboundRemoteNodes-- } } -func (cc *ConnectionController) connectNonValidators() { - numOutboundPeers := uint32(cc.rnManager.GetNonValidatorOutboundIndex().Count()) +func (nm *NetworkManager) connectNonValidators() { + numOutboundPeers := uint32(nm.rnManager.GetNonValidatorOutboundIndex().Count()) remainingOutboundPeers := uint32(0) - if numOutboundPeers < cc.targetNonValidatorOutboundRemoteNodes { - remainingOutboundPeers = cc.targetNonValidatorOutboundRemoteNodes - numOutboundPeers + if numOutboundPeers < nm.targetNonValidatorOutboundRemoteNodes { + remainingOutboundPeers = nm.targetNonValidatorOutboundRemoteNodes - numOutboundPeers } for ii := uint32(0); ii < remainingOutboundPeers; ii++ { - addr := cc.getRandomUnconnectedAddress() + addr := nm.getRandomUnconnectedAddress() if addr == nil { break } - cc.AddrMgr.Attempt(addr) - if err := cc.rnManager.CreateNonValidatorOutboundConnection(addr); err != nil { - glog.V(2).Infof("ConnectionController.connectNonValidators: Problem creating non-validator outbound "+ + nm.AddrMgr.Attempt(addr) + if err := nm.rnManager.CreateNonValidatorOutboundConnection(addr); err != nil { + glog.V(2).Infof("NetworkManager.connectNonValidators: Problem creating non-validator outbound "+ "connection to addr: %v; err: %v", addr, err) } } } -func (cc *ConnectionController) getRandomUnconnectedAddress() *wire.NetAddress { +func (nm *NetworkManager) getRandomUnconnectedAddress() *wire.NetAddress { for tries := 0; tries < 100; tries++ { - addr := cc.AddrMgr.GetAddress() + addr := nm.AddrMgr.GetAddress() if addr == nil { break } - if cc.cmgr.IsConnectedOutboundIpAddress(addr.NetAddress()) { + if nm.cmgr.IsConnectedOutboundIpAddress(addr.NetAddress()) { continue } - if cc.cmgr.IsAttemptedOutboundIpAddress(addr.NetAddress()) { + if nm.cmgr.IsAttemptedOutboundIpAddress(addr.NetAddress()) { continue } // We can only have one outbound address per /16. This is similar to // Bitcoin and we do it to prevent Sybil attacks. - if cc.cmgr.IsFromRedundantOutboundIPAddress(addr.NetAddress()) { + if nm.cmgr.IsFromRedundantOutboundIPAddress(addr.NetAddress()) { continue } @@ -524,62 +532,62 @@ func (cc *ConnectionController) getRandomUnconnectedAddress() *wire.NetAddress { return nil } -func (cc *ConnectionController) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { - netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) +func (nm *NetworkManager) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { + netAddr, err := nm.ConvertIPStringToNetAddress(ipStr) if err != nil { return err } - return cc.rnManager.CreateValidatorConnection(netAddr, publicKey) + return nm.rnManager.CreateValidatorConnection(netAddr, publicKey) } -func (cc *ConnectionController) CreateNonValidatorPersistentOutboundConnection(ipStr string) (RemoteNodeId, error) { - netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) +func (nm *NetworkManager) CreateNonValidatorPersistentOutboundConnection(ipStr string) (RemoteNodeId, error) { + netAddr, err := nm.ConvertIPStringToNetAddress(ipStr) if err != nil { return 0, err } - return cc.rnManager.CreateNonValidatorPersistentOutboundConnection(netAddr) + return nm.rnManager.CreateNonValidatorPersistentOutboundConnection(netAddr) } -func (cc *ConnectionController) CreateNonValidatorOutboundConnection(ipStr string) error { - netAddr, err := cc.ConvertIPStringToNetAddress(ipStr) +func (nm *NetworkManager) CreateNonValidatorOutboundConnection(ipStr string) error { + netAddr, err := nm.ConvertIPStringToNetAddress(ipStr) if err != nil { return err } - return cc.rnManager.CreateNonValidatorOutboundConnection(netAddr) + return nm.rnManager.CreateNonValidatorOutboundConnection(netAddr) } -func (cc *ConnectionController) SetTargetOutboundPeers(numPeers uint32) { - cc.targetNonValidatorOutboundRemoteNodes = numPeers +func (nm *NetworkManager) SetTargetOutboundPeers(numPeers uint32) { + nm.targetNonValidatorOutboundRemoteNodes = numPeers } // processInboundConnection is called when a new inbound connection is established. At this point, the connection is not validated, // nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. // Once the RemoteNode is created, we will initiate handshake. -func (cc *ConnectionController) processInboundConnection(conn Connection) (*RemoteNode, error) { +func (nm *NetworkManager) processInboundConnection(conn Connection) (*RemoteNode, error) { var ic *inboundConnection var ok bool if ic, ok = conn.(*inboundConnection); !ok { - return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Connection is not an inboundConnection") + return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Connection is not an inboundConnection") } // If we want to limit inbound connections to one per IP address, check to make sure this address isn't already connected. - if cc.limitOneInboundRemoteNodePerIP && - cc.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { + if nm.limitOneInboundRemoteNodePerIP && + nm.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { - return nil, fmt.Errorf("ConnectionController.handleInboundConnection: Rejecting INBOUND peer (%s) due to "+ + return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Rejecting INBOUND peer (%s) due to "+ "already having an inbound connection from the same IP with limit_one_inbound_connection_per_ip set", ic.connection.RemoteAddr().String()) } - na, err := cc.ConvertIPStringToNetAddress(ic.connection.RemoteAddr().String()) + na, err := nm.ConvertIPStringToNetAddress(ic.connection.RemoteAddr().String()) if err != nil { - return nil, errors.Wrapf(err, "ConnectionController.handleInboundConnection: Problem calling "+ + return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ "ConvertIPStringToNetAddress for addr: (%s)", ic.connection.RemoteAddr().String()) } - remoteNode, err := cc.rnManager.AttachInboundConnection(ic.connection, na) + remoteNode, err := nm.rnManager.AttachInboundConnection(ic.connection, na) if remoteNode == nil || err != nil { - return nil, errors.Wrapf(err, "ConnectionController.handleInboundConnection: Problem calling "+ + return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ "AttachInboundConnection for addr: (%s)", ic.connection.RemoteAddr().String()) } @@ -589,40 +597,40 @@ func (cc *ConnectionController) processInboundConnection(conn Connection) (*Remo // processOutboundConnection is called when a new outbound connection is established. At this point, the connection is not validated, // nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. // Once the RemoteNode is created, we will initiate handshake. -func (cc *ConnectionController) processOutboundConnection(conn Connection) (*RemoteNode, error) { +func (nm *NetworkManager) processOutboundConnection(conn Connection) (*RemoteNode, error) { var oc *outboundConnection var ok bool if oc, ok = conn.(*outboundConnection); !ok { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Connection is not an outboundConnection") + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Connection is not an outboundConnection") } if oc.failed { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Failed to connect to peer (%s:%v)", + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Failed to connect to peer (%s:%v)", oc.address.IP.String(), oc.address.Port) } if !oc.isPersistent { - cc.AddrMgr.Connected(oc.address) - cc.AddrMgr.Good(oc.address) + nm.AddrMgr.Connected(oc.address) + nm.AddrMgr.Good(oc.address) } // If this is a non-persistent outbound peer and the group key overlaps with another peer we're already connected to then // abort mission. We only connect to one peer per IP group in order to prevent Sybil attacks. - if !oc.isPersistent && cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + if !oc.isPersistent && nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) } - na, err := cc.ConvertIPStringToNetAddress(oc.connection.RemoteAddr().String()) + na, err := nm.ConvertIPStringToNetAddress(oc.connection.RemoteAddr().String()) if err != nil { - return nil, errors.Wrapf(err, "ConnectionController.handleOutboundConnection: Problem calling ipToNetAddr "+ + return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling ipToNetAddr "+ "for addr: (%s)", oc.connection.RemoteAddr().String()) } // Attach the connection before additional validation steps because it is already established. - remoteNode, err := cc.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) + remoteNode, err := nm.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) if remoteNode == nil || err != nil { - return nil, errors.Wrapf(err, "ConnectionController.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ + return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ "for addr: (%s)", oc.connection.RemoteAddr().String()) } @@ -636,24 +644,24 @@ func (cc *ConnectionController) processOutboundConnection(conn Connection) (*Rem // If the group key overlaps with another peer we're already connected to then abort mission. We only connect to // one peer per IP group in order to prevent Sybil attacks. - if cc.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { - return nil, fmt.Errorf("ConnectionController.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + if nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) } - cc.cmgr.AddToGroupKey(na) + nm.cmgr.AddToGroupKey(na) return remoteNode, nil } -func (cc *ConnectionController) ConvertIPStringToNetAddress(ipStr string) (*wire.NetAddress, error) { - netAddr, err := IPToNetAddr(ipStr, cc.AddrMgr, cc.params) +func (nm *NetworkManager) ConvertIPStringToNetAddress(ipStr string) (*wire.NetAddress, error) { + netAddr, err := IPToNetAddr(ipStr, nm.AddrMgr, nm.params) if err != nil { return nil, errors.Wrapf(err, - "ConnectionController.ConvertIPStringToNetAddress: Problem parsing "+ + "NetworkManager.ConvertIPStringToNetAddress: Problem parsing "+ "ipString to wire.NetAddress") } if netAddr == nil { - return nil, fmt.Errorf("ConnectionController.ConvertIPStringToNetAddress: " + + return nil, fmt.Errorf("NetworkManager.ConvertIPStringToNetAddress: " + "address was nil after parsing") } return netAddr, nil @@ -680,24 +688,24 @@ func IPToNetAddr(ipStr string, addrMgr *addrmgr.AddrManager, params *DeSoParams) return netAddr, nil } -func (cc *ConnectionController) isDuplicateInboundIPAddress(addr net.Addr) bool { - netAddr, err := IPToNetAddr(addr.String(), cc.AddrMgr, cc.params) +func (nm *NetworkManager) isDuplicateInboundIPAddress(addr net.Addr) bool { + netAddr, err := IPToNetAddr(addr.String(), nm.AddrMgr, nm.params) if err != nil { // Return true in case we have an error. We do this because it // will result in the peer connection not being accepted, which // is desired in this case. glog.Warningf(errors.Wrapf(err, - "ConnectionController.isDuplicateInboundIPAddress: Problem parsing "+ + "NetworkManager.isDuplicateInboundIPAddress: Problem parsing "+ "net.Addr to wire.NetAddress so marking as redundant and not "+ "making connection").Error()) return true } if netAddr == nil { - glog.Warningf("ConnectionController.isDuplicateInboundIPAddress: " + + glog.Warningf("NetworkManager.isDuplicateInboundIPAddress: " + "address was nil after parsing so marking as redundant and not " + "making connection") return true } - return cc.cmgr.IsDuplicateInboundIPAddress(netAddr) + return nm.cmgr.IsDuplicateInboundIPAddress(netAddr) } diff --git a/lib/peer.go b/lib/peer.go index c59d77921..996d2632d 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -1196,17 +1196,6 @@ func (pp *Peer) Start() { go pp.StartDeSoMessageProcessor() pp.startGroup.Wait() - // If the address manager needs more addresses, then send a GetAddr message - // to the peer. This is best-effort. - if pp.cmgr != nil { - // TODO: Move this to ConnectionController. - /*if pp.cmgr.AddrMgr.NeedMoreAddresses() { - go func() { - pp.QueueMessage(&MsgDeSoGetAddr{}) - }() - }*/ - } - // Send our verack message now that the IO processing machinery has started. } diff --git a/lib/remote_node.go b/lib/remote_node.go index 42fe21521..b8f378d07 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -304,6 +304,8 @@ func (rn *RemoteNode) AttachInboundConnection(conn net.Conn, na *wire.NetAddress id := rn.GetId().ToUint64() rn.peer = rn.cmgr.ConnectPeer(id, conn, na, false, false) + versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + rn.versionTimeExpected = &versionTimeExpected rn.setStatusConnected() return nil } @@ -319,6 +321,8 @@ func (rn *RemoteNode) AttachOutboundConnection(conn net.Conn, na *wire.NetAddres id := rn.GetId().ToUint64() rn.peer = rn.cmgr.ConnectPeer(id, conn, na, true, isPersistent) + versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) + rn.versionTimeExpected = &versionTimeExpected rn.setStatusConnected() return nil } @@ -373,8 +377,6 @@ func (rn *RemoteNode) InitiateHandshake(nonce uint64) error { return fmt.Errorf("InitiateHandshake: Remote node is not connected") } - versionTimeExpected := time.Now().Add(rn.params.VersionNegotiationTimeout) - rn.versionTimeExpected = &versionTimeExpected if rn.GetPeer().IsOutbound() { if err := rn.sendVersionMessage(nonce); err != nil { return fmt.Errorf("InitiateHandshake: Problem sending version message to peer (id= %d): %v", rn.id, err) diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index 2457af58f..bd23908bf 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -79,6 +79,7 @@ func (manager *RemoteNodeManager) ProcessCompletedHandshake(remoteNode *RemoteNo manager.SetNonValidator(remoteNode) } manager.srv.HandleAcceptedPeer(remoteNode.GetPeer()) + manager.srv.maybeRequestAddresses(remoteNode) } func (manager *RemoteNodeManager) Disconnect(rn *RemoteNode) { diff --git a/lib/server.go b/lib/server.go index 987431af8..e54296739 100644 --- a/lib/server.go +++ b/lib/server.go @@ -63,9 +63,8 @@ type Server struct { eventManager *EventManager TxIndex *TXIndex - handshakeController *HandshakeController // fastHotStuffEventLoop consensus.FastHotStuffEventLoop - connectionController *ConnectionController + networkManager *NetworkManager // posMempool *PosMemPool TODO: Add the mempool later params *DeSoParams @@ -182,8 +181,8 @@ func (srv *Server) ResetRequestQueues() { srv.requestedTransactionsMap = make(map[BlockHash]*GetDataRequestInfo) } -func (srv *Server) GetConnectionController() *ConnectionController { - return srv.connectionController +func (srv *Server) GetNetworkManager() *NetworkManager { + return srv.networkManager } // dataLock must be acquired for writing before calling this function. @@ -503,10 +502,8 @@ func NewServer( nodeServices |= SFPosValidator } rnManager := NewRemoteNodeManager(srv, _chain, _cmgr, _blsKeystore, _params, _minFeeRateNanosPerKB, nodeServices) - - srv.handshakeController = NewHandshakeController(rnManager) - srv.connectionController = NewConnectionController(_params, _cmgr, srv.handshakeController, rnManager, _blsKeystore, - _desoAddrMgr, _connectIps, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) + srv.networkManager = NewConnectionController(_params, _cmgr, rnManager, _blsKeystore, _desoAddrMgr, + _connectIps, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) if srv.stateChangeSyncer != nil { srv.stateChangeSyncer.BlockHeight = uint64(_chain.headerTip().Height) @@ -1625,6 +1622,22 @@ func (srv *Server) HandleAcceptedPeer(pp *Peer) { } } +func (srv *Server) maybeRequestAddresses(remoteNode *RemoteNode) { + if remoteNode == nil { + return + } + // If the address manager needs more addresses, then send a GetAddr message + // to the peer. This is best-effort. + if !srv.AddrMgr.NeedMoreAddresses() { + return + } + + if err := remoteNode.SendMessage(&MsgDeSoGetAddr{}); err != nil { + glog.Errorf("Server.maybeRequestAddresses: Problem sending GetAddr message to "+ + "remoteNode (id= %v); err: %v", remoteNode, err) + } +} + func (srv *Server) _cleanupDonePeerState(pp *Peer) { // Grab the dataLock since we'll be modifying requestedBlocks srv.dataLock.Lock() @@ -2186,7 +2199,7 @@ func (srv *Server) _handleAddrMessage(pp *Peer, desoMsg DeSoMessage) { var ok bool if msg, ok = desoMsg.(*MsgDeSoAddr); !ok { glog.Errorf("Server._handleAddrMessage: Problem decoding MsgDeSoAddr: %v", spew.Sdump(desoMsg)) - srv.connectionController.rnManager.DisconnectById(id) + srv.networkManager.rnManager.DisconnectById(id) return } @@ -2202,7 +2215,7 @@ func (srv *Server) _handleAddrMessage(pp *Peer, desoMsg DeSoMessage) { "Peer id=%v for sending us an addr message with %d transactions, which exceeds "+ "the max allowed %d", pp.ID, len(msg.AddrList), MaxAddrsPerAddrMsg)) - srv.connectionController.rnManager.DisconnectById(id) + srv.networkManager.rnManager.DisconnectById(id) return } @@ -2253,7 +2266,7 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, desoMsg DeSoMessage) { if _, ok := desoMsg.(*MsgDeSoGetAddr); !ok { glog.Errorf("Server._handleAddrMessage: Problem decoding "+ "MsgDeSoAddr: %v", spew.Sdump(desoMsg)) - srv.connectionController.rnManager.DisconnectById(id) + srv.networkManager.rnManager.DisconnectById(id) return } @@ -2261,6 +2274,9 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, desoMsg DeSoMessage) { // When we get a GetAddr message, choose MaxAddrsPerMsg from the AddrMgr // and send them back to the peer. netAddrsFound := srv.AddrMgr.AddressCache() + if len(netAddrsFound) == 0 { + return + } if len(netAddrsFound) > MaxAddrsPerAddrMsg { netAddrsFound = netAddrsFound[:MaxAddrsPerAddrMsg] } @@ -2276,10 +2292,10 @@ func (srv *Server) _handleGetAddrMessage(pp *Peer, desoMsg DeSoMessage) { } res.AddrList = append(res.AddrList, singleAddr) } - rn := srv.connectionController.rnManager.GetRemoteNodeById(id) - if err := srv.connectionController.rnManager.SendMessage(rn, res); err != nil { + rn := srv.networkManager.rnManager.GetRemoteNodeById(id) + if err := srv.networkManager.rnManager.SendMessage(rn, res); err != nil { glog.Errorf("Server._handleGetAddrMessage: Problem sending addr message to peer %v: %v", pp, err) - srv.connectionController.rnManager.DisconnectById(id) + srv.networkManager.rnManager.DisconnectById(id) return } } @@ -2289,9 +2305,9 @@ func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_should // Control messages used internally to signal to the server. case *MsgDeSoDisconnectedPeer: srv._handleDonePeer(serverMessage.Peer) - srv.connectionController._handleDonePeerMessage(serverMessage.Peer, serverMessage.Msg) + srv.networkManager._handleDonePeerMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoNewConnection: - srv.connectionController._handleNewConnectionMessage(serverMessage.Peer, serverMessage.Msg) + srv.networkManager._handleNewConnectionMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoQuit: return true } @@ -2330,9 +2346,9 @@ func (srv *Server) _handlePeerMessages(serverMessage *ServerMessage) { case *MsgDeSoInv: srv._handleInv(serverMessage.Peer, msg) case *MsgDeSoVersion: - srv.handshakeController._handleVersionMessage(serverMessage.Peer, serverMessage.Msg) + srv.networkManager._handleVersionMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoVerack: - srv.handshakeController._handleVerackMessage(serverMessage.Peer, serverMessage.Msg) + srv.networkManager._handleVerackMessage(serverMessage.Peer, serverMessage.Msg) } } @@ -2498,7 +2514,7 @@ func (srv *Server) _startAddressRelayer() { // For the first ten minutes after the connection controller starts, relay our address to all // peers. After the first ten minutes, do it once every 24 hours. glog.V(1).Infof("Server.startAddressRelayer: Relaying our own addr to peers") - remoteNodes := srv.connectionController.rnManager.GetAllRemoteNodes().GetAll() + remoteNodes := srv.networkManager.rnManager.GetAllRemoteNodes().GetAll() if numMinutesPassed < 10 || numMinutesPassed%(RebroadcastNodeAddrIntervalMinutes) == 0 { for _, rn := range remoteNodes { if !rn.IsHandshakeCompleted() { @@ -2582,8 +2598,8 @@ func (srv *Server) Stop() { srv.cmgr.Stop() glog.Infof(CLog(Yellow, "Server.Stop: Closed the ConnectionManger")) - srv.connectionController.Stop() - glog.Infof(CLog(Yellow, "Server.Stop: Closed the ConnectionController")) + srv.networkManager.Stop() + glog.Infof(CLog(Yellow, "Server.Stop: Closed the NetworkManager")) // Stop the miner if we have one running. if srv.miner != nil { @@ -2668,7 +2684,7 @@ func (srv *Server) Start() { go srv.miner.Start() } - srv.connectionController.Start() + srv.networkManager.Start() } // SyncPrefixProgress keeps track of sync progress on an individual prefix. It is used in From c7317f531a6e23f939678405683dadc57293fe66 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Tue, 30 Jan 2024 12:20:17 -0800 Subject: [PATCH 15/37] PoS NetworkManager Fix Integration Tests (#960) * Some fixes * Fixes * Fix another integration test * Fix integration tests * Fix RegtestMiner --- integration_testing/blocksync_test.go | 135 ++------ integration_testing/connection_bridge.go | 9 - integration_testing/hypersync_test.go | 302 +++++------------- integration_testing/migrations_test.go | 56 +--- integration_testing/mining_test.go | 27 +- .../network_manager_utils_test.go | 13 + integration_testing/rollback_test.go | 3 + integration_testing/tools.go | 26 +- integration_testing/txindex_test.go | 49 +-- lib/peer.go | 47 ++- lib/remote_node.go | 4 + lib/remote_node_manager.go | 2 +- lib/server.go | 17 +- 13 files changed, 227 insertions(+), 463 deletions(-) diff --git a/integration_testing/blocksync_test.go b/integration_testing/blocksync_test.go index be87aae3a..e03aef152 100644 --- a/integration_testing/blocksync_test.go +++ b/integration_testing/blocksync_test.go @@ -2,10 +2,6 @@ package integration_testing import ( "fmt" - "github.com/deso-protocol/core/cmd" - "github.com/deso-protocol/core/lib" - "github.com/stretchr/testify/require" - "os" "testing" ) @@ -16,40 +12,22 @@ import ( // 4. node2 syncs MaxSyncBlockHeight blocks from node1. // 5. compare node1 db matches node2 db. func TestSimpleBlockSync(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeBlockSync - - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // TODO: Dial an outbound connection from node2 to node1 - // Fix other integration tests. + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) compareNodesByDB(t, node1, node2, 0) fmt.Println("Databases match!") - node1.Stop() - node2.Stop() } // TestSimpleSyncRestart tests if a node can successfully restart while syncing blocks. @@ -61,46 +39,26 @@ func TestSimpleBlockSync(t *testing.T) { // 6. node2 reconnects with node1 and syncs remaining blocks. // 7. compare node1 db matches node2 db. func TestSimpleSyncRestart(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeBlockSync - - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) - randomHeight := randomUint32Between(t, 10, config2.MaxSyncBlockHeight) - fmt.Println("Random height for a restart (re-use if test failed):", randomHeight) + randomHeight := randomUint32Between(t, 10, node2.Config.MaxSyncBlockHeight) + t.Logf("Random height for a restart (re-use if test failed): %v", randomHeight) // Reboot node2 at a specific height and reconnect it with node1 - node2, bridge = restartAtHeightAndReconnectNode(t, node2, node1, bridge, randomHeight) + node2 = restartAtHeight(t, node2, randomHeight) waitForNodeToFullySync(node2) compareNodesByDB(t, node1, node2, 0) - fmt.Println("Random restart successful! Random height was", randomHeight) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Random restart successful! Random height was: %v", randomHeight) + t.Logf("Databases match!") } // TestSimpleSyncDisconnectWithSwitchingToNewPeer tests if a node can successfully restart while syncing blocks, and @@ -114,62 +72,35 @@ func TestSimpleSyncRestart(t *testing.T) { // 7. compare node1 state matches node2 state. // 8. compare node3 state matches node2 state. func TestSimpleSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - dbDir3 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - defer os.RemoveAll(dbDir3) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeBlockSync - config3 := generateConfig(t, 18002, dbDir3, 10) - config3.SyncType = lib.NodeSyncTypeBlockSync - - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - config3.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - node3 := cmd.NewNode(config3) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) - node3 = startNode(t, node3) // wait for node1 to sync blocks waitForNodeToFullySync(node1) + + node3 := spawnNodeProtocol1(t, 18002, "node3") + node3.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} + node3 = startNode(t, node3) + // wait for node3 to sync blocks waitForNodeToFullySync(node3) - // bridge the nodes together. - bridge12 := NewConnectionBridge(node1, node2) - require.NoError(bridge12.Start()) - - randomHeight := randomUint32Between(t, 10, config2.MaxSyncBlockHeight) - fmt.Println("Random height for a restart (re-use if test failed):", randomHeight) - disconnectAtBlockHeight(node2, bridge12, randomHeight) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) - // bridge the nodes together. - bridge23 := NewConnectionBridge(node2, node3) - require.NoError(bridge23.Start()) + randomHeight := randomUint32Between(t, 10, node2.Config.MaxSyncBlockHeight) + t.Logf("Random height for a restart (re-use if test failed): %v", randomHeight) - // Reboot node2 at a specific height and reconnect it with node1 - //node2, bridge12 = restartAtHeightAndReconnectNode(t, node2, node1, bridge12, randomHeight) + // Reboot node2 at a specific height and reconnect it with node3 + node2 = shutdownAtHeight(t, node2, randomHeight) + node2.Config.ConnectIPs = []string{"127.0.0.1:18002"} + node2 = startNode(t, node2) waitForNodeToFullySync(node2) compareNodesByDB(t, node1, node2, 0) compareNodesByDB(t, node3, node2, 0) - fmt.Println("Random restart successful! Random height was", randomHeight) - fmt.Println("Databases match!") - bridge12.Disconnect() - bridge23.Disconnect() - node1.Stop() - node2.Stop() - node3.Stop() + t.Logf("Random restart successful! Random height was %v", randomHeight) + t.Logf("Databases match!") } diff --git a/integration_testing/connection_bridge.go b/integration_testing/connection_bridge.go index 139c7cafb..b93fabac5 100644 --- a/integration_testing/connection_bridge.go +++ b/integration_testing/connection_bridge.go @@ -201,7 +201,6 @@ func ReadWithTimeout(readFunc func() error, readTimeout time.Duration) error { func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode *cmd.Node) error { // Prepare the version message. versionMessage := bridge.getVersionMessage(otherNode) - connection.VersionNonceSent = versionMessage.Nonce // Send the version message. fmt.Println("Sending version message:", versionMessage, versionMessage.LatestBlockHeight) @@ -222,7 +221,6 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode return err } - connection.VersionNonceReceived = verMsg.Nonce connection.TimeConnected = time.Unix(verMsg.TstampSecs, 0) connection.TimeOffsetSecs = verMsg.TstampSecs - time.Now().Unix() return nil @@ -233,7 +231,6 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode // Now prepare the verack message. verackMsg := lib.NewMessage(lib.MsgTypeVerack) - verackMsg.(*lib.MsgDeSoVerack).NonceReceived = connection.VersionNonceReceived // And send it to the connection. if err := connection.WriteDeSoMessage(verackMsg); err != nil { @@ -251,17 +248,11 @@ func (bridge *ConnectionBridge) startConnection(connection *lib.Peer, otherNode if msg.GetMsgType() != lib.MsgTypeVerack { return fmt.Errorf("message is not verack! Type: %v", msg.GetMsgType()) } - verackMsg := msg.(*lib.MsgDeSoVerack) - if verackMsg.NonceReceived != connection.VersionNonceSent { - return fmt.Errorf("verack message nonce doesn't match (received: %v, sent: %v)", - verackMsg.NonceReceived, connection.VersionNonceSent) - } return nil }, lib.DeSoMainnetParams.VersionNegotiationTimeout); err != nil { return err } - connection.VersionNegotiated = true return nil } diff --git a/integration_testing/hypersync_test.go b/integration_testing/hypersync_test.go index bc4c8a7c0..b76b1db48 100644 --- a/integration_testing/hypersync_test.go +++ b/integration_testing/hypersync_test.go @@ -1,11 +1,7 @@ package integration_testing import ( - "fmt" - "github.com/deso-protocol/core/cmd" "github.com/deso-protocol/core/lib" - "github.com/stretchr/testify/require" - "os" "testing" ) @@ -16,35 +12,19 @@ import ( // 4. node2 hypersyncs from node1 // 5. once done, compare node1 state, db, and checksum matches node2. func TestSimpleHyperSync(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeHyperSync - - config1.HyperSync = true - config2.HyperSync = true - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSync + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) @@ -52,10 +32,7 @@ func TestSimpleHyperSync(t *testing.T) { compareNodesByState(t, node1, node2, 0) //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Databases match!") } // TestHyperSyncFromHyperSyncedNode test if a node can successfully hypersync from another hypersynced node: @@ -66,49 +43,28 @@ func TestSimpleHyperSync(t *testing.T) { // 5. once done, bridge node3 and node2 so that node3 hypersyncs from node2. // 6. compare node1 state, db, and checksum matches node2, and node3. func TestHyperSyncFromHyperSyncedNode(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - dbDir3 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - defer os.RemoveAll(dbDir3) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - config3 := generateConfig(t, 18002, dbDir3, 10) - config3.SyncType = lib.NodeSyncTypeHyperSyncArchival - - config1.HyperSync = true - config2.HyperSync = true - config3.HyperSync = true - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - node3 := cmd.NewNode(config3) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) - node3 = startNode(t, node3) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge12 := NewConnectionBridge(node1, node2) - require.NoError(bridge12.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) - // bridge node3 to node2 to kick off hyper sync from a hyper synced node - bridge23 := NewConnectionBridge(node2, node3) - require.NoError(bridge23.Start()) + node3 := spawnNodeProtocol1(t, 18002, "node3") + node3.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node3.Config.HyperSync = true + node3.Config.ConnectIPs = []string{"127.0.0.1:18001"} + node3 = startNode(t, node3) // wait for node2 to sync blocks. waitForNodeToFullySync(node3) @@ -122,12 +78,7 @@ func TestHyperSyncFromHyperSyncedNode(t *testing.T) { //compareNodesByDB(t, node2, node3, 0) compareNodesByChecksum(t, node2, node3) - fmt.Println("Databases match!") - bridge12.Disconnect() - bridge23.Disconnect() - node1.Stop() - node2.Stop() - node3.Stop() + t.Logf("Databases match!") } // TestSimpleHyperSyncRestart test if a node can successfully hyper sync from another node: @@ -138,52 +89,34 @@ func TestHyperSyncFromHyperSyncedNode(t *testing.T) { // 5. node2 reconnects to node1 and hypersyncs again. // 6. Once node2 finishes sync, compare node1 state, db, and checksum matches node2. func TestSimpleHyperSyncRestart(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config2 := generateConfig(t, 18001, dbDir2, 10) - - config1.HyperSync = true - config1.SyncType = lib.NodeSyncTypeBlockSync - config2.HyperSync = true - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) syncIndex := randomUint32Between(t, 0, uint32(len(lib.StatePrefixes.StatePrefixesList))) syncPrefix := lib.StatePrefixes.StatePrefixesList[syncIndex] - fmt.Println("Random sync prefix for a restart (re-use if test failed):", syncPrefix) + t.Logf("Random sync prefix for a restart (re-use if test failed): %v", syncPrefix) + // Reboot node2 at a specific sync prefix and reconnect it with node1 - node2, bridge = restartAtSyncPrefixAndReconnectNode(t, node2, node1, bridge, syncPrefix) + node2 = restartAtSyncPrefix(t, node2, syncPrefix) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) compareNodesByState(t, node1, node2, 0) //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) - fmt.Println("Random restart successful! Random sync prefix was", syncPrefix) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Random restart successful! Random sync prefix was: %v", syncPrefix) + t.Logf("Databases match!") } // TestSimpleHyperSyncDisconnectWithSwitchingToNewPeer tests if a node can successfully restart while hypersyncing. @@ -194,57 +127,34 @@ func TestSimpleHyperSyncRestart(t *testing.T) { // 5. after restart, bridge node2 with node3 and resume hypersync. // 6. once node2 finishes, compare node1, node2, node3 state, db, and checksums are identical. func TestSimpleHyperSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - dbDir3 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - defer os.RemoveAll(dbDir3) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - config3 := generateConfig(t, 18002, dbDir3, 10) - config3.SyncType = lib.NodeSyncTypeBlockSync - - config1.HyperSync = true - config2.HyperSync = true - config3.HyperSync = true - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - config3.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - node3 := cmd.NewNode(config3) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) - node3 = startNode(t, node3) - // wait for node1 to sync blocks waitForNodeToFullySync(node1) + + node3 := spawnNodeProtocol1(t, 18002, "node3") + node3.Config.HyperSync = true + node3.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node3 = startNode(t, node3) // wait for node3 to sync blocks waitForNodeToFullySync(node3) - // bridge the nodes together. - bridge12 := NewConnectionBridge(node1, node2) - require.NoError(bridge12.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) + // Reboot node2 at a specific height and reconnect it with node1 syncIndex := randomUint32Between(t, 0, uint32(len(lib.StatePrefixes.StatePrefixesList))) syncPrefix := lib.StatePrefixes.StatePrefixesList[syncIndex] - fmt.Println("Random prefix for a restart (re-use if test failed):", syncPrefix) - disconnectAtSyncPrefix(t, node2, bridge12, syncPrefix) - - // bridge the nodes together. - bridge23 := NewConnectionBridge(node2, node3) - require.NoError(bridge23.Start()) + t.Logf("Random prefix for a restart (re-use if test failed): %v", syncPrefix) + node2 = shutdownAtSyncPrefix(t, node2, syncPrefix) + node2.Config.ConnectIPs = []string{"127.0.0.1:18002"} + node2 = startNode(t, node2) - // Reboot node2 at a specific height and reconnect it with node1 - //node2, bridge12 = restartAtHeightAndReconnectNode(t, node2, node1, bridge12, randomHeight) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) @@ -257,13 +167,8 @@ func TestSimpleHyperSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { compareNodesByState(t, node1, node2, 0) //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) - fmt.Println("Random restart successful! Random sync prefix was", syncPrefix) - fmt.Println("Databases match!") - bridge12.Disconnect() - bridge23.Disconnect() - node1.Stop() - node2.Stop() - node3.Stop() + t.Logf("Random restart successful! Random sync prefix was: %v", syncPrefix) + t.Logf("Databases match!") } // TODO: disconnecting the provider peer during hypersync doesn't work. @@ -317,93 +222,49 @@ func TestSimpleHyperSyncDisconnectWithSwitchingToNewPeer(t *testing.T) { //} func TestArchivalMode(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config2 := generateConfig(t, 18001, dbDir2, 10) - - config1.HyperSync = true - config2.HyperSync = true - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - config1.SyncType = lib.NodeSyncTypeBlockSync - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) compareNodesByDB(t, node1, node2, 0) - - //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Databases match!") } func TestBlockSyncFromArchivalModeHyperSync(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - dbDir3 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - defer os.RemoveAll(dbDir3) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config2 := generateConfig(t, 18001, dbDir2, 10) - config3 := generateConfig(t, 18002, dbDir3, 10) - - config1.HyperSync = true - config1.SyncType = lib.NodeSyncTypeBlockSync - config2.HyperSync = true - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - config3.HyperSync = false - config3.SyncType = lib.NodeSyncTypeBlockSync - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - node3 := cmd.NewNode(config3) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) - node3 = startNode(t, node3) - // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge12 := NewConnectionBridge(node1, node2) - require.NoError(bridge12.Start()) - + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) - bridge23 := NewConnectionBridge(node2, node3) - require.NoError(bridge23.Start()) - + node3 := spawnNodeProtocol1(t, 18002, "node3") + node3.Config.SyncType = lib.NodeSyncTypeBlockSync + node3.Config.HyperSync = true + node3.Config.ConnectIPs = []string{"127.0.0.1:18001"} + node3 = startNode(t, node3) // wait for node3 to sync blocks. waitForNodeToFullySync(node3) @@ -412,10 +273,5 @@ func TestBlockSyncFromArchivalModeHyperSync(t *testing.T) { //compareNodesByDB(t, node1, node2, 0) compareNodesByChecksum(t, node1, node2) - fmt.Println("Databases match!") - bridge12.Disconnect() - bridge23.Disconnect() - node1.Stop() - node2.Stop() - node3.Stop() + t.Logf("Databases match!") } diff --git a/integration_testing/migrations_test.go b/integration_testing/migrations_test.go index 1419d483e..067a2f3b6 100644 --- a/integration_testing/migrations_test.go +++ b/integration_testing/migrations_test.go @@ -1,65 +1,39 @@ package integration_testing import ( - "fmt" - "github.com/deso-protocol/core/cmd" - "github.com/deso-protocol/core/lib" "github.com/stretchr/testify/require" - "os" "testing" ) // TODO: Add an encoder migration height in constants.go then modify some // random struct like UtxoEntry. Until we have a migration, we can't fully test this. func TestEncoderMigrations(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.SyncType = lib.NodeSyncTypeHyperSync - - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - config1.HyperSync = true - config2.HyperSync = true - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.HyperSync = true + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} node1 = startNode(t, node1) - node2 = startNode(t, node2) - // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) - + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.HyperSync = true + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2 = startNode(t, node2) // wait for node2 to sync blocks. waitForNodeToFullySync(node2) - fmt.Println("Chain state and operation channel", node2.Server.GetBlockchain().ChainState(), + t.Logf("Chain state and operation channel (state: %v), (len: %v)", node2.Server.GetBlockchain().ChainState(), len(node2.Server.GetBlockchain().Snapshot().OperationChannel.OperationChannel)) compareNodesByState(t, node1, node2, 0) - fmt.Println("node1 checksum:", computeNodeStateChecksum(t, node1, 1500)) - fmt.Println("node2 checksum:", computeNodeStateChecksum(t, node2, 1500)) + t.Logf("node1 checksum: %v", computeNodeStateChecksum(t, node1, 1500)) + t.Logf("node2 checksum: %v", computeNodeStateChecksum(t, node2, 1500)) checksum1, err := node1.Server.GetBlockchain().Snapshot().Checksum.ToBytes() - require.NoError(err) + require.NoError(t, err) checksum2, err := node2.Server.GetBlockchain().Snapshot().Checksum.ToBytes() - require.NoError(err) - fmt.Println("node1 server checksum:", checksum1) - fmt.Println("node2 server checksum:", checksum2) + require.NoError(t, err) + t.Logf("node1 server checksum: %v", checksum1) + t.Logf("node2 server checksum: %v", checksum2) compareNodesByChecksum(t, node1, node2) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Databases match!") } diff --git a/integration_testing/mining_test.go b/integration_testing/mining_test.go index 88de5e097..facbce226 100644 --- a/integration_testing/mining_test.go +++ b/integration_testing/mining_test.go @@ -1,35 +1,22 @@ package integration_testing import ( - "github.com/deso-protocol/core/cmd" "github.com/deso-protocol/core/lib" - "github.com/stretchr/testify/require" - "os" "testing" ) // TestSimpleBlockSync test if a node can mine blocks on regtest func TestRegtestMiner(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - defer os.RemoveAll(dbDir1) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.SyncType = lib.NodeSyncTypeBlockSync - config1.Params = &lib.DeSoTestnetParams - config1.MaxSyncBlockHeight = 0 - config1.MinerPublicKeys = []string{"tBCKVERmG9nZpHTk2AVPqknWc1Mw9HHAnqrTpW1RnXpXMQ4PsQgnmV"} - - config1.Regtest = true - - node1 := cmd.NewNode(config1) + node1 := spawnNodeProtocol1(t, 18000, "node1") + params := lib.DeSoTestnetParams + node1.Config.Params = ¶ms + node1.Params = ¶ms + node1.Config.MaxSyncBlockHeight = 0 + node1.Config.MinerPublicKeys = []string{"tBCKVERmG9nZpHTk2AVPqknWc1Mw9HHAnqrTpW1RnXpXMQ4PsQgnmV"} + node1.Config.Regtest = true node1 = startNode(t, node1) // wait for node1 to sync blocks mineHeight := uint32(40) <-listenForBlockHeight(node1, mineHeight) - - node1.Stop() } diff --git a/integration_testing/network_manager_utils_test.go b/integration_testing/network_manager_utils_test.go index f14cb39d9..d46b5a22f 100644 --- a/integration_testing/network_manager_utils_test.go +++ b/integration_testing/network_manager_utils_test.go @@ -253,6 +253,19 @@ func getRemoteNodeWithUserAgent(node *cmd.Node, userAgent string) *lib.RemoteNod return nil } +func spawnNodeProtocol1(t *testing.T, port uint32, id string) *cmd.Node { + dbDir := getDirectory(t) + t.Cleanup(func() { + os.RemoveAll(dbDir) + }) + config := generateConfig(t, port, dbDir, 10) + config.SyncType = lib.NodeSyncTypeBlockSync + node := cmd.NewNode(config) + node.Params.UserAgent = id + node.Params.ProtocolVersion = lib.ProtocolVersion1 + return node +} + func spawnNonValidatorNodeProtocol2(t *testing.T, port uint32, id string) *cmd.Node { dbDir := getDirectory(t) t.Cleanup(func() { diff --git a/integration_testing/rollback_test.go b/integration_testing/rollback_test.go index 8028866ac..c7b440b2b 100644 --- a/integration_testing/rollback_test.go +++ b/integration_testing/rollback_test.go @@ -10,7 +10,10 @@ import ( ) // Start blocks to height 5000 and then disconnect +// TODO: This test won't work now. func TestStateRollback(t *testing.T) { + t.Skipf("DisconnectBlocksToHeight doesn't work in PoS") + require := require.New(t) _ = require diff --git a/integration_testing/tools.go b/integration_testing/tools.go index df9aad581..42cb6c11f 100644 --- a/integration_testing/tools.go +++ b/integration_testing/tools.go @@ -69,7 +69,7 @@ func generateConfig(t *testing.T, port uint32, dataDir string, maxPeers uint32) config.MaxSyncBlockHeight = 0 config.ConnectIPs = []string{} config.PrivateMode = true - config.GlogV = 2 + config.GlogV = 0 config.GlogVmodule = "*bitcoin_manager*=0,*balance*=0,*view*=0,*frontend*=0,*peer*=0,*addr*=0,*network*=0,*utils*=0,*connection*=0,*main*=0,*server*=0,*mempool*=0,*miner*=0,*blockchain*=0" config.MaxInboundPeers = maxPeers config.TargetOutboundPeers = maxPeers @@ -427,6 +427,16 @@ func restartAtHeightAndReconnectNode(t *testing.T, node *cmd.Node, source *cmd.N return newNode, bridge } +func restartAtHeight(t *testing.T, node *cmd.Node, height uint32) *cmd.Node { + <-listenForBlockHeight(node, height) + return restartNode(t, node) +} + +func shutdownAtHeight(t *testing.T, node *cmd.Node, height uint32) *cmd.Node { + <-listenForBlockHeight(node, height) + return shutdownNode(t, node) +} + // listenForSyncPrefix will wait until the node starts downloading the provided syncPrefix in hypersync, and then sends // a message to the provided signal channel. func listenForSyncPrefix(t *testing.T, node *cmd.Node, syncPrefix []byte, signal chan<- bool) { @@ -470,6 +480,20 @@ func restartAtSyncPrefixAndReconnectNode(t *testing.T, node *cmd.Node, source *c return newNode, bridge } +func restartAtSyncPrefix(t *testing.T, node *cmd.Node, syncPrefix []byte) *cmd.Node { + listener := make(chan bool) + listenForSyncPrefix(t, node, syncPrefix, listener) + <-listener + return restartNode(t, node) +} + +func shutdownAtSyncPrefix(t *testing.T, node *cmd.Node, syncPrefix []byte) *cmd.Node { + listener := make(chan bool) + listenForSyncPrefix(t, node, syncPrefix, listener) + <-listener + return shutdownNode(t, node) +} + func randomUint32Between(t *testing.T, min, max uint32) uint32 { require := require.New(t) randomNumber, err := wire.RandomUint64() diff --git a/integration_testing/txindex_test.go b/integration_testing/txindex_test.go index dfd398557..702e63c10 100644 --- a/integration_testing/txindex_test.go +++ b/integration_testing/txindex_test.go @@ -1,11 +1,7 @@ package integration_testing import ( - "fmt" - "github.com/deso-protocol/core/cmd" "github.com/deso-protocol/core/lib" - "github.com/stretchr/testify/require" - "os" "testing" ) @@ -16,39 +12,21 @@ import ( // 4. node2 syncs MaxSyncBlockHeight blocks from node1, and builds txindex afterwards. // 5. compare node1 db and txindex matches node2. func TestSimpleTxIndex(t *testing.T) { - require := require.New(t) - _ = require - - dbDir1 := getDirectory(t) - dbDir2 := getDirectory(t) - defer os.RemoveAll(dbDir1) - defer os.RemoveAll(dbDir2) - - config1 := generateConfig(t, 18000, dbDir1, 10) - config1.HyperSync = true - config1.SyncType = lib.NodeSyncTypeBlockSync - config2 := generateConfig(t, 18001, dbDir2, 10) - config2.HyperSync = true - config2.SyncType = lib.NodeSyncTypeHyperSyncArchival - - config1.TXIndex = true - config2.TXIndex = true - config1.ConnectIPs = []string{"deso-seed-2.io:17000"} - - node1 := cmd.NewNode(config1) - node2 := cmd.NewNode(config2) - + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.ConnectIPs = []string{"deso-seed-2.io:17000"} + node1.Config.HyperSync = true + node1.Config.TXIndex = true node1 = startNode(t, node1) - node2 = startNode(t, node2) - // wait for node1 to sync blocks waitForNodeToFullySync(node1) - // bridge the nodes together. - bridge := NewConnectionBridge(node1, node2) - require.NoError(bridge.Start()) - - // wait for node2 to sync blocks. + node2 := spawnNodeProtocol1(t, 18001, "node2") + node2.Config.SyncType = lib.NodeSyncTypeHyperSyncArchival + node2.Config.ConnectIPs = []string{"127.0.0.1:18000"} + node2.Config.HyperSync = true + node2.Config.TXIndex = true + node2 = startNode(t, node2) + // wait for node1 to sync blocks waitForNodeToFullySync(node2) waitForNodeToFullySyncTxIndex(node1) @@ -56,8 +34,5 @@ func TestSimpleTxIndex(t *testing.T) { compareNodesByDB(t, node1, node2, 0) compareNodesByTxIndex(t, node1, node2, 0) - fmt.Println("Databases match!") - bridge.Disconnect() - node1.Stop() - node2.Stop() + t.Logf("Databases match!") } diff --git a/lib/peer.go b/lib/peer.go index 996d2632d..6bc683382 100644 --- a/lib/peer.go +++ b/lib/peer.go @@ -48,7 +48,6 @@ type Peer struct { StatsMtx deadlock.RWMutex TimeOffsetSecs int64 TimeConnected time.Time - startingHeight uint32 ID uint64 // Ping-related fields. LastPingNonce uint64 @@ -64,32 +63,16 @@ type Peer struct { Params *DeSoParams MessageChan chan *ServerMessage - // In order to complete a version negotiation successfully, the peer must - // reply to the initial version message we send them with a verack message - // containing the nonce from that initial version message. This ensures that - // the peer's IP isn't being spoofed since the only way to actually produce - // a verack with the appropriate response is to actually own the IP that - // the peer claims it has. As such, we maintain the version nonce we sent - // the peer and the version nonce they sent us here. - // - // TODO: The way we synchronize the version nonce is currently a bit - // messy; ideally we could do it without keeping global state. - VersionNonceSent uint64 - VersionNonceReceived uint64 - // A pointer to the Server srv *Server // Basic state. - PeerInfoMtx deadlock.Mutex - serviceFlags ServiceFlag - addrStr string - netAddr *wire.NetAddress - userAgent string - advertisedProtocolVersion uint64 - negotiatedProtocolVersion uint64 - VersionNegotiated bool - minTxFeeRateNanosPerKB uint64 + PeerInfoMtx deadlock.Mutex + serviceFlags ServiceFlag + latestHeight uint64 + addrStr string + netAddr *wire.NetAddress + minTxFeeRateNanosPerKB uint64 // Messages for which we are expecting a reply within a fixed // amount of time. This list is always sorted by ExpectedTime, // with the item having the earliest time at the front. @@ -682,10 +665,10 @@ func (pp *Peer) MinFeeRateNanosPerKB() uint64 { } // StartingBlockHeight is the height of the peer's blockchain tip. -func (pp *Peer) StartingBlockHeight() uint32 { +func (pp *Peer) StartingBlockHeight() uint64 { pp.StatsMtx.RLock() defer pp.StatsMtx.RUnlock() - return pp.startingHeight + return pp.latestHeight } // NumBlocksToSend is the number of blocks the Peer has requested from @@ -910,6 +893,20 @@ func (pp *Peer) _setKnownAddressesMap(key string, val bool) { pp.knownAddressesMap[key] = val } +func (pp *Peer) SetLatestBlockHeight(height uint64) { + pp.StatsMtx.Lock() + defer pp.StatsMtx.Unlock() + + pp.latestHeight = height +} + +func (pp *Peer) SetServiceFlag(sf ServiceFlag) { + pp.PeerInfoMtx.Lock() + defer pp.PeerInfoMtx.Unlock() + + pp.serviceFlags = sf +} + func (pp *Peer) outHandler() { pp.startGroup.Done() glog.V(1).Infof("Peer.outHandler: Starting outHandler for Peer %v", pp) diff --git a/lib/remote_node.go b/lib/remote_node.go index b8f378d07..6fb5e13ba 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -203,6 +203,10 @@ func (rn *RemoteNode) GetServiceFlag() ServiceFlag { return rn.handshakeMetadata.serviceFlag } +func (rn *RemoteNode) GetLatestBlockHeight() uint64 { + return rn.handshakeMetadata.latestBlockHeight +} + func (rn *RemoteNode) GetUserAgent() string { return rn.handshakeMetadata.userAgent } diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index bd23908bf..2dfb77431 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -78,7 +78,7 @@ func (manager *RemoteNodeManager) ProcessCompletedHandshake(remoteNode *RemoteNo manager.UnsetValidator(remoteNode) manager.SetNonValidator(remoteNode) } - manager.srv.HandleAcceptedPeer(remoteNode.GetPeer()) + manager.srv.HandleAcceptedPeer(remoteNode) manager.srv.maybeRequestAddresses(remoteNode) } diff --git a/lib/server.go b/lib/server.go index e54296739..5efd755f6 100644 --- a/lib/server.go +++ b/lib/server.go @@ -442,6 +442,7 @@ func NewServer( nodeMessageChannel: _nodeMessageChan, forceChecksum: _forceChecksum, AddrMgr: _desoAddrMgr, + params: _params, } if stateChangeSyncer != nil { @@ -837,8 +838,8 @@ func (srv *Server) GetBlocks(pp *Peer, maxHeight int) { func (srv *Server) _handleHeaderBundle(pp *Peer, msg *MsgDeSoHeaderBundle) { printHeight := pp.StartingBlockHeight() - if srv.blockchain.headerTip().Height > printHeight { - printHeight = srv.blockchain.headerTip().Height + if uint64(srv.blockchain.headerTip().Height) > printHeight { + printHeight = uint64(srv.blockchain.headerTip().Height) } glog.Infof(CLog(Yellow, fmt.Sprintf("Received header bundle with %v headers "+ "in state %s from peer %v. Downloaded ( %v / %v ) total headers", @@ -1545,6 +1546,7 @@ func (srv *Server) _startSync() { // Find a peer with StartingHeight bigger than our best header tip. var bestPeer *Peer for _, peer := range srv.cmgr.GetAllPeers() { + if !peer.IsSyncCandidate() { glog.Infof("Peer is not sync candidate: %v (isOutbound: %v)", peer, peer.isOutbound) continue @@ -1552,7 +1554,7 @@ func (srv *Server) _startSync() { // Choose the peer with the best height out of everyone who's a // valid sync candidate. - if peer.StartingBlockHeight() < bestHeight { + if peer.StartingBlockHeight() < uint64(bestHeight) { continue } @@ -1602,7 +1604,14 @@ func (srv *Server) _startSync() { } -func (srv *Server) HandleAcceptedPeer(pp *Peer) { +func (srv *Server) HandleAcceptedPeer(rn *RemoteNode) { + if rn == nil || rn.GetPeer() == nil { + return + } + pp := rn.GetPeer() + pp.SetServiceFlag(rn.GetServiceFlag()) + pp.SetLatestBlockHeight(rn.GetLatestBlockHeight()) + isSyncCandidate := pp.IsSyncCandidate() isSyncing := srv.blockchain.isSyncing() chainState := srv.blockchain.chainState() From edaf1749cdb0bef82dcbe709417054f248ffb386 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Tue, 30 Jan 2024 12:26:17 -0800 Subject: [PATCH 16/37] Fix fmt (#973) --- integration_testing/blocksync_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration_testing/blocksync_test.go b/integration_testing/blocksync_test.go index e03aef152..cf077f2c1 100644 --- a/integration_testing/blocksync_test.go +++ b/integration_testing/blocksync_test.go @@ -1,7 +1,6 @@ package integration_testing import ( - "fmt" "testing" ) @@ -27,7 +26,7 @@ func TestSimpleBlockSync(t *testing.T) { waitForNodeToFullySync(node2) compareNodesByDB(t, node1, node2, 0) - fmt.Println("Databases match!") + t.Logf("Databases match!") } // TestSimpleSyncRestart tests if a node can successfully restart while syncing blocks. From 76bf439e148d2d0fba010b3e1b3f68fa3b967db4 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Tue, 30 Jan 2024 22:15:51 -0800 Subject: [PATCH 17/37] PoS Networking and Syncing Documentation (#974) * noop * NetworkManager documentation * gofmt --- lib/network_manager.go | 311 +++++++++++++++++++++++++---------------- lib/server.go | 8 +- 2 files changed, 197 insertions(+), 122 deletions(-) diff --git a/lib/network_manager.go b/lib/network_manager.go index 0227a6ac9..9a83676bc 100644 --- a/lib/network_manager.go +++ b/lib/network_manager.go @@ -15,11 +15,25 @@ import ( "time" ) -// NetworkManager is a structure that oversees all connections to remote nodes. It is responsible for kicking off -// the initial connections a node makes to the network. It is also responsible for creating RemoteNodes from all -// successful outbound and inbound connections. The NetworkManager also ensures that the node is connected to -// the active validators, once the node reaches Proof of Stake. -// TODO: Document more in later PRs +// NetworkManager is a structure that oversees all connections to RemoteNodes. NetworkManager has the following +// responsibilities in regard to the lifecycle of RemoteNodes: +// - Maintain a list of all RemoteNodes that the node is connected to through the RemoteNodeManager. +// - Initialize RemoteNodes from established outbound and inbound peer connections. +// - Initiate and handle the communication of the handshake process with RemoteNodes. +// +// The NetworkManager is also responsible for opening and closing connections. It does this by running a set of +// goroutines that periodically check the state of different categories of RemoteNodes, and disconnects or connects +// RemoteNodes as needed. These categories of RemoteNodes include: +// - Persistent RemoteNodes: These are RemoteNodes that we want to maintain a persistent (constant) connection to. +// These are specified by the --connect-ips flag. +// - Validators: These are RemoteNodes that are in the active validators set. We want to maintain a connection to +// all active validators. We also want to disconnect from any validators that are no longer active. +// - Non-Validators: These are RemoteNodes that are not in the active validators set. We want to maintain a connection +// to at most a target number of outbound and inbound non-validators. If we have more than the target number of +// outbound or inbound non-validators, we will disconnect the excess RemoteNodes. +// +// The NetworkManager also runs an auxiliary goroutine that periodically cleans up RemoteNodes that may have timed out +// the handshake process, or became invalid for some other reason. type NetworkManager struct { // The parameters we are initialized with. params *DeSoParams @@ -28,7 +42,6 @@ type NetworkManager struct { blsKeystore *BLSKeystore handshake *HandshakeManager - rnManager *RemoteNodeManager // The address manager keeps track of peer addresses we're aware of. When @@ -86,10 +99,13 @@ func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, rnMana } func (nm *NetworkManager) Start() { + // If the NetworkManager routines are disabled, we do nothing. if nm.params.DisableNetworkManagerRoutines { return } + // Start the NetworkManager goroutines. The startGroup is used to ensure that all goroutines have started before + // exiting the context of this function. nm.startGroup.Add(4) go nm.startPersistentConnector() go nm.startValidatorConnector() @@ -112,6 +128,17 @@ func (nm *NetworkManager) GetRemoteNodeManager() *RemoteNodeManager { return nm.rnManager } +func (nm *NetworkManager) SetTargetOutboundPeers(numPeers uint32) { + nm.targetNonValidatorOutboundRemoteNodes = numPeers +} + +// ########################### +// ## NetworkManager Routines +// ########################### + +// startPersistentConnector is responsible for ensuring that the node is connected to all persistent IP addresses. It +// does this by periodically checking the persistentIpToRemoteNodeIdsMap, and connecting to any persistent IP addresses +// that are not already connected. func (nm *NetworkManager) startPersistentConnector() { nm.startGroup.Done() for { @@ -165,6 +192,8 @@ func (nm *NetworkManager) startNonValidatorConnector() { } } +// startRemoteNodeCleanup is responsible for cleaning up RemoteNodes that may have timed out the handshake process, +// or became invalid for some other reason. func (nm *NetworkManager) startRemoteNodeCleanup() { nm.startGroup.Done() @@ -184,23 +213,29 @@ func (nm *NetworkManager) startRemoteNodeCleanup() { // ## Handlers (Peer, DeSoMessage) // ########################### +// _handleVersionMessage is called when a new version message is received. It is a wrapper around the handshake's +// handleVersionMessage function. func (nm *NetworkManager) _handleVersionMessage(origin *Peer, desoMsg DeSoMessage) { nm.handshake.handleVersionMessage(origin, desoMsg) } +// _handleVerackMessage is called when a new verack message is received. It is a wrapper around the handshake's +// handleVerackMessage function. func (nm *NetworkManager) _handleVerackMessage(origin *Peer, desoMsg DeSoMessage) { nm.handshake.handleVerackMessage(origin, desoMsg) } -func (nm *NetworkManager) _handleDonePeerMessage(origin *Peer, desoMsg DeSoMessage) { +// _handleDisconnectedPeerMessage is called when a peer is disconnected. It is responsible for cleaning up the +// RemoteNode associated with the peer. +func (nm *NetworkManager) _handleDisconnectedPeerMessage(origin *Peer, desoMsg DeSoMessage) { if desoMsg.GetMsgType() != MsgTypeDisconnectedPeer { return } - glog.V(2).Infof("NetworkManager.handleDonePeerMessage: Handling disconnected peer message for "+ + glog.V(2).Infof("NetworkManager._handleDisconnectedPeerMessage: Handling disconnected peer message for "+ "id=%v", origin.ID) nm.rnManager.DisconnectById(NewRemoteNodeId(origin.ID)) - // Update the persistentIpToRemoteNodeIdsMap. + // Update the persistentIpToRemoteNodeIdsMap, in case the disconnected peer was a persistent peer. ipRemoteNodeIdMap := nm.persistentIpToRemoteNodeIdsMap.ToMap() for ip, id := range ipRemoteNodeIdMap { if id.ToUint64() == origin.ID { @@ -223,6 +258,7 @@ func (nm *NetworkManager) _handleNewConnectionMessage(origin *Peer, desoMsg DeSo var remoteNode *RemoteNode var err error + // We create the RemoteNode differently depending on whether the connection is inbound or outbound. switch msg.Connection.GetConnectionType() { case ConnectionTypeInbound: remoteNode, err = nm.processInboundConnection(msg.Connection) @@ -244,6 +280,102 @@ func (nm *NetworkManager) _handleNewConnectionMessage(origin *Peer, desoMsg DeSo nm.handshake.InitiateHandshake(remoteNode) } +// processInboundConnection is called when a new inbound connection is established. At this point, the connection is not validated, +// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. +// Once the RemoteNode is created, we will initiate handshake. +func (nm *NetworkManager) processInboundConnection(conn Connection) (*RemoteNode, error) { + var ic *inboundConnection + var ok bool + if ic, ok = conn.(*inboundConnection); !ok { + return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Connection is not an inboundConnection") + } + + // If we want to limit inbound connections to one per IP address, check to make sure this address isn't already connected. + if nm.limitOneInboundRemoteNodePerIP && + nm.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { + + return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Rejecting INBOUND peer (%s) due to "+ + "already having an inbound connection from the same IP with limit_one_inbound_connection_per_ip set", + ic.connection.RemoteAddr().String()) + } + + na, err := nm.ConvertIPStringToNetAddress(ic.connection.RemoteAddr().String()) + if err != nil { + return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ + "ConvertIPStringToNetAddress for addr: (%s)", ic.connection.RemoteAddr().String()) + } + + remoteNode, err := nm.rnManager.AttachInboundConnection(ic.connection, na) + if remoteNode == nil || err != nil { + return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ + "AttachInboundConnection for addr: (%s)", ic.connection.RemoteAddr().String()) + } + + return remoteNode, nil +} + +// processOutboundConnection is called when a new outbound connection is established. At this point, the connection is not validated, +// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. +// Once the RemoteNode is created, we will initiate handshake. +func (nm *NetworkManager) processOutboundConnection(conn Connection) (*RemoteNode, error) { + var oc *outboundConnection + var ok bool + if oc, ok = conn.(*outboundConnection); !ok { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Connection is not an outboundConnection") + } + + if oc.failed { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Failed to connect to peer (%s:%v)", + oc.address.IP.String(), oc.address.Port) + } + + if !oc.isPersistent { + nm.AddrMgr.Connected(oc.address) + nm.AddrMgr.Good(oc.address) + } + + // If this is a non-persistent outbound peer and the group key overlaps with another peer we're already connected to then + // abort mission. We only connect to one peer per IP group in order to prevent Sybil attacks. + if !oc.isPersistent && nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) + } + + na, err := nm.ConvertIPStringToNetAddress(oc.connection.RemoteAddr().String()) + if err != nil { + return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling ipToNetAddr "+ + "for addr: (%s)", oc.connection.RemoteAddr().String()) + } + + // Attach the connection before additional validation steps because it is already established. + remoteNode, err := nm.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) + if remoteNode == nil || err != nil { + return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ + "for addr: (%s)", oc.connection.RemoteAddr().String()) + } + + // If this is a persistent remote node or a validator, we don't need to do any extra connection validation. + if remoteNode.IsPersistent() || remoteNode.IsExpectedValidator() { + return remoteNode, nil + } + + // If we get here, it means we're dealing with a non-persistent or non-validator remote node. We perform additional + // connection validation. + + // If the group key overlaps with another peer we're already connected to then abort mission. We only connect to + // one peer per IP group in order to prevent Sybil attacks. + if nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { + return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ + "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) + } + nm.cmgr.AddToGroupKey(na) + + return remoteNode, nil +} + +// cleanupFailedInboundConnection is called when an inbound connection fails to be processed. It is responsible for +// cleaning up the RemoteNode and the connection. Most of the time, the RemoteNode will be nil, but if the RemoteNode +// was successfully created, we will disconnect it. func (nm *NetworkManager) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection Connection) { glog.V(2).Infof("NetworkManager.cleanupFailedInboundConnection: Cleaning up failed inbound connection") if remoteNode != nil { @@ -252,6 +384,8 @@ func (nm *NetworkManager) cleanupFailedInboundConnection(remoteNode *RemoteNode, connection.Close() } +// cleanupFailedOutboundConnection is called when an outbound connection fails to be processed. It is responsible for +// cleaning up the RemoteNode and the connection. func (nm *NetworkManager) cleanupFailedOutboundConnection(connection Connection) { oc, ok := connection.(*outboundConnection) if !ok { @@ -259,6 +393,8 @@ func (nm *NetworkManager) cleanupFailedOutboundConnection(connection Connection) } glog.V(2).Infof("NetworkManager.cleanupFailedOutboundConnection: Cleaning up failed outbound connection") + // Find the RemoteNode associated with the connection. It should almost always exist, since we create the RemoteNode + // as we're attempting to connect to the address. id := NewRemoteNodeId(oc.attemptId) rn := nm.rnManager.GetRemoteNodeById(id) if rn != nil { @@ -272,6 +408,8 @@ func (nm *NetworkManager) cleanupFailedOutboundConnection(connection Connection) // ## Persistent Connections // ########################### +// refreshConnectIps is called periodically by the persistent connector. It is responsible for connecting to all +// persistent IP addresses that we are not already connected to. func (nm *NetworkManager) refreshConnectIps() { // Connect to addresses passed via the --connect-ips flag. These addresses are persistent in the sense that if we // disconnect from one, we will try to reconnect to the same one. @@ -296,6 +434,8 @@ func (nm *NetworkManager) refreshConnectIps() { // ## Validator Connections // ########################### +// SetActiveValidatorsMap is called by the owner of the NetworkManager to update the activeValidatorsMap. This should +// generally be done whenever the active validators set changes. func (nm *NetworkManager) SetActiveValidatorsMap(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { nm.activeValidatorsMapLock.Lock() defer nm.activeValidatorsMapLock.Unlock() @@ -314,7 +454,7 @@ func (nm *NetworkManager) getActiveValidatorsMap() *collections.ConcurrentMap[bl func (nm *NetworkManager) refreshValidatorIndex(activeValidatorsMap *collections.ConcurrentMap[bls.SerializedPublicKey, consensus.Validator]) { // De-index inactive validators. We skip any checks regarding RemoteNodes connection status, nor do we verify whether // de-indexing the validator would result in an excess number of outbound/inbound connections. Any excess connections - // will be cleaned up by the peer connector. + // will be cleaned up by the NonValidator connector. validatorRemoteNodeMap := nm.rnManager.GetValidatorIndex().ToMap() for pk, rn := range validatorRemoteNodeMap { // If the validator is no longer active, de-index it. @@ -367,6 +507,7 @@ func (nm *NetworkManager) connectValidators(activeValidatorsMap *collections.Con if exists { continue } + // If the validator is our node, continue. if nm.blsKeystore.GetSigner().GetPublicKey().Serialize() == pk { continue } @@ -392,8 +533,8 @@ func (nm *NetworkManager) connectValidators(activeValidatorsMap *collections.Con // ## NonValidator Connections // ########################### -// refreshNonValidatorOutboundIndex is called periodically by the peer connector. It is responsible for disconnecting excess -// outbound remote nodes. +// refreshNonValidatorOutboundIndex is called periodically by the NonValidator connector. It is responsible for +// disconnecting excess outbound remote nodes. func (nm *NetworkManager) refreshNonValidatorOutboundIndex() { // There are three categories of outbound remote nodes: attempted, connected, and persistent. All of these // remote nodes are stored in the same non-validator outbound index. We want to disconnect excess remote nodes that @@ -435,7 +576,7 @@ func (nm *NetworkManager) refreshNonValidatorOutboundIndex() { break } glog.V(2).Infof("NetworkManager.refreshNonValidatorOutboundIndex: Disconnecting attempted remote "+ - "node (id=%v) due to excess outbound peers", rn.GetId()) + "node (id=%v) due to excess outbound RemoteNodes", rn.GetId()) nm.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } @@ -445,7 +586,7 @@ func (nm *NetworkManager) refreshNonValidatorOutboundIndex() { break } glog.V(2).Infof("NetworkManager.refreshNonValidatorOutboundIndex: Disconnecting connected remote "+ - "node (id=%v) due to excess outbound peers", rn.GetId()) + "node (id=%v) due to excess outbound RemoteNodes", rn.GetId()) nm.rnManager.Disconnect(rn) excessiveOutboundRemoteNodes-- } @@ -464,12 +605,18 @@ func (nm *NetworkManager) refreshNonValidatorInboundIndex() { inboundRemoteNodes := nm.rnManager.GetNonValidatorInboundIndex().GetAll() var connectedInboundRemoteNodes []*RemoteNode for _, rn := range inboundRemoteNodes { - // We only want to disconnect remote nodes that have completed handshake. + // We only want to disconnect remote nodes that have completed handshake. RemoteNodes that don't have the + // handshake completed status could be validators, in which case we don't want to disconnect them. It is also + // possible that the RemoteNodes without completed handshake will end up never finishing it, in which case + // they will be removed by the cleanup goroutine, once the handshake timeout is reached. if rn.IsHandshakeCompleted() { connectedInboundRemoteNodes = append(connectedInboundRemoteNodes, rn) } } + // Having separated the connected remote nodes, we can now find the actual number of connected inbound remote nodes + // that have completed handshake. We can then find out how many remote nodes we need to disconnect. + numConnectedInboundRemoteNodes = uint32(len(connectedInboundRemoteNodes)) excessiveInboundRemoteNodes := uint32(0) if numConnectedInboundRemoteNodes > nm.targetNonValidatorInboundRemoteNodes { excessiveInboundRemoteNodes = numConnectedInboundRemoteNodes - nm.targetNonValidatorInboundRemoteNodes @@ -479,24 +626,40 @@ func (nm *NetworkManager) refreshNonValidatorInboundIndex() { break } glog.V(2).Infof("NetworkManager.refreshNonValidatorInboundIndex: Disconnecting inbound remote "+ - "node (id=%v) due to excess inbound peers", rn.GetId()) + "node (id=%v) due to excess inbound RemoteNodes", rn.GetId()) nm.rnManager.Disconnect(rn) excessiveInboundRemoteNodes-- } } +// connectNonValidators attempts to connect to new outbound nonValidator remote nodes. It is called periodically by the +// nonValidator connector. func (nm *NetworkManager) connectNonValidators() { - numOutboundPeers := uint32(nm.rnManager.GetNonValidatorOutboundIndex().Count()) - - remainingOutboundPeers := uint32(0) - if numOutboundPeers < nm.targetNonValidatorOutboundRemoteNodes { - remainingOutboundPeers = nm.targetNonValidatorOutboundRemoteNodes - numOutboundPeers + // First, find all nonValidator outbound remote nodes that are not persistent. + allOutboundRemoteNodes := nm.rnManager.GetNonValidatorOutboundIndex().GetAll() + var nonValidatorOutboundRemoteNodes []*RemoteNode + for _, rn := range allOutboundRemoteNodes { + if rn.IsPersistent() || rn.IsExpectedValidator() { + // We do nothing for persistent remote nodes or expected validators. + continue + } else { + nonValidatorOutboundRemoteNodes = append(nonValidatorOutboundRemoteNodes, rn) + } + } + // Now find the number of nonValidator, non-persistent outbound remote nodes. + numOutboundRemoteNodes := uint32(len(nonValidatorOutboundRemoteNodes)) + remainingOutboundRemoteNodes := uint32(0) + // Check if we need to connect to more nonValidator outbound remote nodes. + if numOutboundRemoteNodes < nm.targetNonValidatorOutboundRemoteNodes { + remainingOutboundRemoteNodes = nm.targetNonValidatorOutboundRemoteNodes - numOutboundRemoteNodes } - for ii := uint32(0); ii < remainingOutboundPeers; ii++ { + for ii := uint32(0); ii < remainingOutboundRemoteNodes; ii++ { + // Get a random unconnected address from the address manager. If we can't find one, we break out of the loop. addr := nm.getRandomUnconnectedAddress() if addr == nil { break } + // Attempt to connect to the address. nm.AddrMgr.Attempt(addr) if err := nm.rnManager.CreateNonValidatorOutboundConnection(addr); err != nil { glog.V(2).Infof("NetworkManager.connectNonValidators: Problem creating non-validator outbound "+ @@ -505,6 +668,7 @@ func (nm *NetworkManager) connectNonValidators() { } } +// getRandomUnconnectedAddress returns a random address from the address manager that we are not already connected to. func (nm *NetworkManager) getRandomUnconnectedAddress() *wire.NetAddress { for tries := 0; tries < 100; tries++ { addr := nm.AddrMgr.GetAddress() @@ -532,6 +696,10 @@ func (nm *NetworkManager) getRandomUnconnectedAddress() *wire.NetAddress { return nil } +// ########################### +// ## RemoteNode Dial Functions +// ########################### + func (nm *NetworkManager) CreateValidatorConnection(ipStr string, publicKey *bls.PublicKey) error { netAddr, err := nm.ConvertIPStringToNetAddress(ipStr) if err != nil { @@ -556,102 +724,9 @@ func (nm *NetworkManager) CreateNonValidatorOutboundConnection(ipStr string) err return nm.rnManager.CreateNonValidatorOutboundConnection(netAddr) } -func (nm *NetworkManager) SetTargetOutboundPeers(numPeers uint32) { - nm.targetNonValidatorOutboundRemoteNodes = numPeers -} - -// processInboundConnection is called when a new inbound connection is established. At this point, the connection is not validated, -// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. -// Once the RemoteNode is created, we will initiate handshake. -func (nm *NetworkManager) processInboundConnection(conn Connection) (*RemoteNode, error) { - var ic *inboundConnection - var ok bool - if ic, ok = conn.(*inboundConnection); !ok { - return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Connection is not an inboundConnection") - } - - // If we want to limit inbound connections to one per IP address, check to make sure this address isn't already connected. - if nm.limitOneInboundRemoteNodePerIP && - nm.isDuplicateInboundIPAddress(ic.connection.RemoteAddr()) { - - return nil, fmt.Errorf("NetworkManager.handleInboundConnection: Rejecting INBOUND peer (%s) due to "+ - "already having an inbound connection from the same IP with limit_one_inbound_connection_per_ip set", - ic.connection.RemoteAddr().String()) - } - - na, err := nm.ConvertIPStringToNetAddress(ic.connection.RemoteAddr().String()) - if err != nil { - return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ - "ConvertIPStringToNetAddress for addr: (%s)", ic.connection.RemoteAddr().String()) - } - - remoteNode, err := nm.rnManager.AttachInboundConnection(ic.connection, na) - if remoteNode == nil || err != nil { - return nil, errors.Wrapf(err, "NetworkManager.handleInboundConnection: Problem calling "+ - "AttachInboundConnection for addr: (%s)", ic.connection.RemoteAddr().String()) - } - - return remoteNode, nil -} - -// processOutboundConnection is called when a new outbound connection is established. At this point, the connection is not validated, -// nor is it assigned to a RemoteNode. This function is responsible for validating the connection and creating a RemoteNode from it. -// Once the RemoteNode is created, we will initiate handshake. -func (nm *NetworkManager) processOutboundConnection(conn Connection) (*RemoteNode, error) { - var oc *outboundConnection - var ok bool - if oc, ok = conn.(*outboundConnection); !ok { - return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Connection is not an outboundConnection") - } - - if oc.failed { - return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Failed to connect to peer (%s:%v)", - oc.address.IP.String(), oc.address.Port) - } - - if !oc.isPersistent { - nm.AddrMgr.Connected(oc.address) - nm.AddrMgr.Good(oc.address) - } - - // If this is a non-persistent outbound peer and the group key overlaps with another peer we're already connected to then - // abort mission. We only connect to one peer per IP group in order to prevent Sybil attacks. - if !oc.isPersistent && nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { - return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ - "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) - } - - na, err := nm.ConvertIPStringToNetAddress(oc.connection.RemoteAddr().String()) - if err != nil { - return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling ipToNetAddr "+ - "for addr: (%s)", oc.connection.RemoteAddr().String()) - } - - // Attach the connection before additional validation steps because it is already established. - remoteNode, err := nm.rnManager.AttachOutboundConnection(oc.connection, na, oc.attemptId, oc.isPersistent) - if remoteNode == nil || err != nil { - return nil, errors.Wrapf(err, "NetworkManager.handleOutboundConnection: Problem calling rnManager.AttachOutboundConnection "+ - "for addr: (%s)", oc.connection.RemoteAddr().String()) - } - - // If this is a persistent remote node or a validator, we don't need to do any extra connection validation. - if remoteNode.IsPersistent() || remoteNode.IsExpectedValidator() { - return remoteNode, nil - } - - // If we get here, it means we're dealing with a non-persistent or non-validator remote node. We perform additional - // connection validation. - - // If the group key overlaps with another peer we're already connected to then abort mission. We only connect to - // one peer per IP group in order to prevent Sybil attacks. - if nm.cmgr.IsFromRedundantOutboundIPAddress(oc.address) { - return nil, fmt.Errorf("NetworkManager.handleOutboundConnection: Rejecting OUTBOUND NON-PERSISTENT "+ - "connection with redundant group key (%s).", addrmgr.GroupKey(oc.address)) - } - nm.cmgr.AddToGroupKey(na) - - return remoteNode, nil -} +// ########################### +// ## Helper Functions +// ########################### func (nm *NetworkManager) ConvertIPStringToNetAddress(ipStr string) (*wire.NetAddress, error) { netAddr, err := IPToNetAddr(ipStr, nm.AddrMgr, nm.params) diff --git a/lib/server.go b/lib/server.go index 5efd755f6..9eff29d98 100644 --- a/lib/server.go +++ b/lib/server.go @@ -1711,8 +1711,8 @@ func (srv *Server) _cleanupDonePeerState(pp *Peer) { }, false) } -func (srv *Server) _handleDonePeer(pp *Peer) { - glog.V(1).Infof("Server._handleDonePeer: Processing DonePeer: %v", pp) +func (srv *Server) _handleDisconnectedPeerMessage(pp *Peer) { + glog.V(1).Infof("Server._handleDisconnectedPeerMessage: Processing DonePeer: %v", pp) srv._cleanupDonePeerState(pp) @@ -2313,8 +2313,8 @@ func (srv *Server) _handleControlMessages(serverMessage *ServerMessage) (_should switch serverMessage.Msg.(type) { // Control messages used internally to signal to the server. case *MsgDeSoDisconnectedPeer: - srv._handleDonePeer(serverMessage.Peer) - srv.networkManager._handleDonePeerMessage(serverMessage.Peer, serverMessage.Msg) + srv._handleDisconnectedPeerMessage(serverMessage.Peer) + srv.networkManager._handleDisconnectedPeerMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoNewConnection: srv.networkManager._handleNewConnectionMessage(serverMessage.Peer, serverMessage.Msg) case *MsgDeSoQuit: From a6b26b73aa11ae3ac01865adbec791f38135dc9b Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Wed, 31 Jan 2024 09:35:25 -0800 Subject: [PATCH 18/37] Rewording (#981) --- lib/remote_node.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/remote_node.go b/lib/remote_node.go index 6fb5e13ba..07610f8ec 100644 --- a/lib/remote_node.go +++ b/lib/remote_node.go @@ -35,8 +35,8 @@ func (id RemoteNodeId) ToUint64() uint64 { return uint64(id) } -// RemoteNode is a consensus-aware wrapper around the network Peer object. It is used to manage the lifecycle of a peer -// and to store consensus-related metadata about the peer. The RemoteNode can wrap around either an inbound or outbound +// RemoteNode is a chain-aware wrapper around the network Peer object. It is used to manage the lifecycle of a peer +// and to store blockchain-related metadata about the peer. The RemoteNode can wrap around either an inbound or outbound // peer connection. For outbound peers, the RemoteNode is created prior to the connection being established. In this case, // the RemoteNode will be first used to initiate an OutboundConnectionAttempt, and then store the resulting connected peer. // For inbound peers, the RemoteNode is created after the connection is established in ConnectionManager. From 20f2eb4cfcc11d80d88233bd2443d0ec0c2b2d2a Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Wed, 31 Jan 2024 11:31:34 -0800 Subject: [PATCH 19/37] Rename (#986) --- lib/{handshake_controller.go => handshake_manager.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lib/{handshake_controller.go => handshake_manager.go} (100%) diff --git a/lib/handshake_controller.go b/lib/handshake_manager.go similarity index 100% rename from lib/handshake_controller.go rename to lib/handshake_manager.go From 5ffc022f9506be27fd4dd612448943c53da5ab77 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:03:24 -0800 Subject: [PATCH 20/37] PoS NetworkManager Fix Deadlock and Test AddIps (#996) * Fix Deadlock and Test AddIps * Glog fix --- .../network_manager_routines_test.go | 18 +++++++++++++++--- lib/remote_node_manager.go | 9 ++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/integration_testing/network_manager_routines_test.go b/integration_testing/network_manager_routines_test.go index 141e36026..31d53e211 100644 --- a/integration_testing/network_manager_routines_test.go +++ b/integration_testing/network_manager_routines_test.go @@ -468,17 +468,29 @@ func TestConnectionControllerNonValidatorConnectorInbound(t *testing.T) { func TestConnectionControllerNonValidatorConnectorAddressMgr(t *testing.T) { // Spawn a non-validator node1. Set node1's targetOutboundPeers to 2 and targetInboundPeers to 0. Then // add two ip addresses to AddrMgr. Make sure that node1 creates outbound connections to these nodes. - node1 := spawnNonValidatorNodeProtocol2(t, 18000, "node1") + node1 := spawnNodeProtocol1(t, 18000, "node1") node1.Config.TargetOutboundPeers = 2 node1.Config.MaxInboundPeers = 0 + node1.Config.MaxSyncBlockHeight = 1 node1 = startNode(t, node1) nm := node1.Server.GetNetworkManager() na1, err := nm.ConvertIPStringToNetAddress("deso-seed-2.io:17000") - na2, err := nm.ConvertIPStringToNetAddress("deso-seed-3.io:17000") require.NoError(t, err) nm.AddrMgr.AddAddress(na1, na1) - nm.AddrMgr.AddAddress(na2, na2) + waitForCountRemoteNodeIndexerHandshakeCompleted(t, node1, 1, 0, 1, 0) +} + +func TestConnectionControllerNonValidatorConnectorAddIps(t *testing.T) { + // Spawn a non-validator node1. Set node1's targetOutboundPeers to 2 and targetInboundPeers to 0. Then + // add two ip addresses to the ConnectIPs. Make sure that node1 creates outbound connections to these nodes. + node1 := spawnNodeProtocol1(t, 18000, "node1") + node1.Config.TargetOutboundPeers = 2 + node1.Config.MaxInboundPeers = 0 + node1.Config.MaxSyncBlockHeight = 1 + node1.Config.AddIPs = []string{"deso-seed-2.io", "deso-seed-3.io"} + + node1 = startNode(t, node1) waitForCountRemoteNodeIndexer(t, node1, 2, 0, 2, 0) } diff --git a/lib/remote_node_manager.go b/lib/remote_node_manager.go index 2dfb77431..3300d5d62 100644 --- a/lib/remote_node_manager.go +++ b/lib/remote_node_manager.go @@ -52,7 +52,8 @@ func NewRemoteNodeManager(srv *Server, bc *Blockchain, cmgr *ConnectionManager, } func (manager *RemoteNodeManager) DisconnectAll() { - for _, rn := range manager.GetAllRemoteNodes().GetAll() { + allRemoteNodes := manager.GetAllRemoteNodes().GetAll() + for _, rn := range allRemoteNodes { glog.V(2).Infof("RemoteNodeManager.DisconnectAll: Disconnecting from remote node (id=%v)", rn.GetId()) manager.Disconnect(rn) } @@ -135,10 +136,8 @@ func (manager *RemoteNodeManager) SendMessage(rn *RemoteNode, desoMessage DeSoMe } func (manager *RemoteNodeManager) Cleanup() { - manager.mtx.Lock() - defer manager.mtx.Unlock() - - for _, rn := range manager.GetAllRemoteNodes().GetAll() { + allRemoteNodes := manager.GetAllRemoteNodes().GetAll() + for _, rn := range allRemoteNodes { if rn.IsTimedOut() { glog.V(2).Infof("RemoteNodeManager.Cleanup: Disconnecting from remote node (id=%v)", rn.GetId()) manager.Disconnect(rn) From 1b0bc01171b94b790e8799a1519fa1d98ba86147 Mon Sep 17 00:00:00 2001 From: Piotr Nojszewski <29924594+AeonSw4n@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:06:58 -0800 Subject: [PATCH 21/37] PoS NetworkManager Comment Nit (#997) --- integration_testing/network_manager_routines_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration_testing/network_manager_routines_test.go b/integration_testing/network_manager_routines_test.go index 31d53e211..368c419c8 100644 --- a/integration_testing/network_manager_routines_test.go +++ b/integration_testing/network_manager_routines_test.go @@ -466,10 +466,10 @@ func TestConnectionControllerNonValidatorConnectorInbound(t *testing.T) { } func TestConnectionControllerNonValidatorConnectorAddressMgr(t *testing.T) { - // Spawn a non-validator node1. Set node1's targetOutboundPeers to 2 and targetInboundPeers to 0. Then - // add two ip addresses to AddrMgr. Make sure that node1 creates outbound connections to these nodes. + // Spawn a non-validator node1. Set node1's targetOutboundPeers to 1 and targetInboundPeers to 0. Then + // add one ip address to AddrMgr. Make sure that node1 creates outbound connections to this node. node1 := spawnNodeProtocol1(t, 18000, "node1") - node1.Config.TargetOutboundPeers = 2 + node1.Config.TargetOutboundPeers = 1 node1.Config.MaxInboundPeers = 0 node1.Config.MaxSyncBlockHeight = 1 @@ -483,7 +483,7 @@ func TestConnectionControllerNonValidatorConnectorAddressMgr(t *testing.T) { func TestConnectionControllerNonValidatorConnectorAddIps(t *testing.T) { // Spawn a non-validator node1. Set node1's targetOutboundPeers to 2 and targetInboundPeers to 0. Then - // add two ip addresses to the ConnectIPs. Make sure that node1 creates outbound connections to these nodes. + // add two ip addresses to AddIps. Make sure that node1 creates outbound connections to these nodes. node1 := spawnNodeProtocol1(t, 18000, "node1") node1.Config.TargetOutboundPeers = 2 node1.Config.MaxInboundPeers = 0 From aa0fb853e0ef12dc2bd016cfcfb6aabf00f71b23 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Wed, 14 Feb 2024 19:09:05 -0500 Subject: [PATCH 22/37] Add ProtocolVersion To Regtest Params (#1035) --- lib/constants.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/constants.go b/lib/constants.go index 543e5bbc4..cd3e08995 100644 --- a/lib/constants.go +++ b/lib/constants.go @@ -844,6 +844,9 @@ func (params *DeSoParams) EnableRegtest() { // Clear the seeds params.DNSSeeds = []string{} + // Set the protocol version + params.ProtocolVersion = ProtocolVersion2 + // Mine blocks incredibly quickly params.TimeBetweenBlocks = 2 * time.Second params.TimeBetweenDifficultyRetargets = 6 * time.Second From f0c7b5d6a23174b450d0e69c25ef266d548e5954 Mon Sep 17 00:00:00 2001 From: iamsofonias Date: Wed, 14 Feb 2024 19:53:29 -0500 Subject: [PATCH 23/37] Fix Constructor Name for NetworkManager --- lib/network_manager.go | 11 ++++++----- lib/server.go | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/network_manager.go b/lib/network_manager.go index 9a83676bc..f3b138c33 100644 --- a/lib/network_manager.go +++ b/lib/network_manager.go @@ -2,6 +2,11 @@ package lib import ( "fmt" + "net" + "strconv" + "sync" + "time" + "github.com/btcsuite/btcd/addrmgr" "github.com/btcsuite/btcd/wire" "github.com/deso-protocol/core/bls" @@ -9,10 +14,6 @@ import ( "github.com/deso-protocol/core/consensus" "github.com/golang/glog" "github.com/pkg/errors" - "net" - "strconv" - "sync" - "time" ) // NetworkManager is a structure that oversees all connections to RemoteNodes. NetworkManager has the following @@ -76,7 +77,7 @@ type NetworkManager struct { exitGroup sync.WaitGroup } -func NewConnectionController(params *DeSoParams, cmgr *ConnectionManager, rnManager *RemoteNodeManager, +func NewNetworkManager(params *DeSoParams, cmgr *ConnectionManager, rnManager *RemoteNodeManager, blsKeystore *BLSKeystore, addrMgr *addrmgr.AddrManager, connectIps []string, targetNonValidatorOutboundRemoteNodes uint32, targetNonValidatorInboundRemoteNodes uint32, limitOneInboundConnectionPerIP bool) *NetworkManager { diff --git a/lib/server.go b/lib/server.go index dd13023b2..76017f8a0 100644 --- a/lib/server.go +++ b/lib/server.go @@ -515,7 +515,7 @@ func NewServer( nodeServices |= SFPosValidator } rnManager := NewRemoteNodeManager(srv, _chain, _cmgr, _blsKeystore, _params, _minFeeRateNanosPerKB, nodeServices) - srv.networkManager = NewConnectionController(_params, _cmgr, rnManager, _blsKeystore, _desoAddrMgr, + srv.networkManager = NewNetworkManager(_params, _cmgr, rnManager, _blsKeystore, _desoAddrMgr, _connectIps, _targetOutboundPeers, _maxInboundPeers, _limitOneInboundConnectionPerIP) if srv.stateChangeSyncer != nil { From bc95c03a01ede2b2d0d7f0cce6e4eacd79c09fe1 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:40:43 -0500 Subject: [PATCH 24/37] Broadcast Votes, Timeouts, and Block Proposals To Validators (#1036) * Broadcast Votes and Timeouts To Validators * Broadcast Block Proposals To Network * Fix failed unit tests * Address Nina's comments --- lib/pos_consensus.go | 34 +++++++++++++++++++++++++++++----- lib/pos_consensus_test.go | 20 ++++++++++++++++++-- lib/server.go | 1 + 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index 6c3382899..209665b5e 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -14,6 +14,7 @@ import ( type FastHotStuffConsensus struct { lock sync.RWMutex + networkManager *NetworkManager blockchain *Blockchain fastHotStuffEventLoop consensus.FastHotStuffEventLoop mempool Mempool @@ -25,6 +26,7 @@ type FastHotStuffConsensus struct { func NewFastHotStuffConsensus( params *DeSoParams, + networkManager *NetworkManager, blockchain *Blockchain, mempool Mempool, signer *BLSSigner, @@ -32,6 +34,7 @@ func NewFastHotStuffConsensus( timeoutBaseDurationMilliseconds uint64, ) *FastHotStuffConsensus { return &FastHotStuffConsensus{ + networkManager: networkManager, blockchain: blockchain, fastHotStuffEventLoop: consensus.NewFastHotStuffEventLoop(), mempool: mempool, @@ -281,7 +284,18 @@ func (fc *FastHotStuffConsensus) handleBlockProposalEvent( ) } - // TODO: Broadcast the block proposal to the network + // Broadcast the block to the validator network + validators := fc.networkManager.rnManager.GetValidatorIndex().GetAll() + for _, validator := range validators { + sendMessageToRemoteNodeAsync(validator, blockProposal) + } + + // Broadcast the block to all inbound non-validator peers. This allows them to sync + // blocks from us. + nonValidators := fc.networkManager.rnManager.GetNonValidatorInboundIndex().GetAll() + for _, nonValidator := range nonValidators { + sendMessageToRemoteNodeAsync(nonValidator, blockProposal) + } fc.logBlockProposal(blockProposal, blockHash) return nil @@ -346,8 +360,11 @@ func (fc *FastHotStuffConsensus) HandleLocalVoteEvent(event *consensus.FastHotSt return errors.Errorf("FastHotStuffConsensus.HandleLocalVoteEvent: Error processing vote locally: %v", err) } - // Broadcast the vote message to the network - // TODO: Broadcast the vote message to the network or alternatively to just the block proposer + // Broadcast the block to the validator network + validators := fc.networkManager.rnManager.GetValidatorIndex().GetAll() + for _, validator := range validators { + sendMessageToRemoteNodeAsync(validator, voteMsg) + } return nil } @@ -461,8 +478,11 @@ func (fc *FastHotStuffConsensus) HandleLocalTimeoutEvent(event *consensus.FastHo return errors.Errorf("FastHotStuffConsensus.HandleLocalTimeoutEvent: Error processing timeout locally: %v", err) } - // Broadcast the timeout message to the network - // TODO: Broadcast the timeout message to the network or alternatively to just the block proposer + // Broadcast the block to the validator network + validators := fc.networkManager.rnManager.GetValidatorIndex().GetAll() + for _, validator := range validators { + sendMessageToRemoteNodeAsync(validator, timeoutMsg) + } return nil } @@ -815,6 +835,10 @@ func isProperlyFormedBlockProposalEvent(event *consensus.FastHotStuffEvent) bool return false } +func sendMessageToRemoteNodeAsync(remoteNode *RemoteNode, msg DeSoMessage) { + go func(rn *RemoteNode, m DeSoMessage) { rn.SendMessage(m) }(remoteNode, msg) +} + ////////////////////////////////////////// Logging Helper Functions /////////////////////////////////////////////// func (fc *FastHotStuffConsensus) logBlockProposal(block *MsgDeSoBlock, blockHash *BlockHash) { diff --git a/lib/pos_consensus_test.go b/lib/pos_consensus_test.go index 9bdb136a0..99d8c4e9d 100644 --- a/lib/pos_consensus_test.go +++ b/lib/pos_consensus_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/deso-protocol/core/bls" + "github.com/deso-protocol/core/collections" "github.com/deso-protocol/core/consensus" "github.com/deso-protocol/go-deadlock" "github.com/pkg/errors" @@ -26,7 +27,8 @@ func TestFastHotStuffConsensusHandleLocalVoteEvent(t *testing.T) { // Create a mock consensus fastHotStuffConsensus := FastHotStuffConsensus{ - lock: sync.RWMutex{}, + lock: sync.RWMutex{}, + networkManager: _createMockNetworkManagerForConsensus(), blockchain: &Blockchain{ params: &DeSoTestnetParams, }, @@ -104,7 +106,8 @@ func TestFastHotStuffConsensusHandleLocalTimeoutEvent(t *testing.T) { // Create a mock consensus fastHotStuffConsensus := FastHotStuffConsensus{ - lock: sync.RWMutex{}, + lock: sync.RWMutex{}, + networkManager: _createMockNetworkManagerForConsensus(), signer: &BLSSigner{ privateKey: blsPrivateKey, }, @@ -200,3 +203,16 @@ func TestFastHotStuffConsensusHandleLocalTimeoutEvent(t *testing.T) { func alwaysReturnTrue() bool { return true } + +func _createMockNetworkManagerForConsensus() *NetworkManager { + return &NetworkManager{ + rnManager: &RemoteNodeManager{ + remoteNodeIndexer: &RemoteNodeIndexer{ + AllRemoteNodes: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + ValidatorIndex: collections.NewConcurrentMap[bls.SerializedPublicKey, *RemoteNode](), + NonValidatorOutboundIndex: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + NonValidatorInboundIndex: collections.NewConcurrentMap[RemoteNodeId, *RemoteNode](), + }, + }, + } +} diff --git a/lib/server.go b/lib/server.go index 76017f8a0..86b999fd4 100644 --- a/lib/server.go +++ b/lib/server.go @@ -617,6 +617,7 @@ func NewServer( if _blsKeystore != nil { srv.fastHotStuffConsensus = NewFastHotStuffConsensus( _params, + srv.networkManager, _chain, _posMempool, _blsKeystore.GetSigner(), From 99d0a46845a7c01805e9f9d9d500a0fd62aef694 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:00:52 -0500 Subject: [PATCH 25/37] Use RemoteNode To Fetch Missing PoS Blocks (#1045) --- lib/pos_consensus.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index 209665b5e..6a78ad92e 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -539,9 +539,11 @@ func (fc *FastHotStuffConsensus) HandleBlock(pp *Peer, msg *MsgDeSoBlock) error // // See https://github.com/deso-protocol/core/pull/875#discussion_r1460183510 for more details. if len(missingBlockHashes) > 0 { - pp.QueueMessage(&MsgDeSoGetBlocks{ - HashList: missingBlockHashes, - }) + remoteNode := fc.networkManager.rnManager.GetRemoteNodeFromPeer(pp) + if remoteNode == nil { + return errors.Errorf("FastHotStuffConsensus.HandleBlock: RemoteNode not found for peer: %v", pp) + } + sendMessageToRemoteNodeAsync(remoteNode, &MsgDeSoGetBlocks{HashList: missingBlockHashes}) } return nil From d2f2cdd7815c98f5dcdfc16a99b6a92c1f5653f9 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Tue, 20 Feb 2024 18:55:57 -0500 Subject: [PATCH 26/37] Update Active Validator Connections In PoS Consensus (#1054) --- lib/pos_blockchain.go | 5 +++ lib/pos_consensus.go | 75 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/lib/pos_blockchain.go b/lib/pos_blockchain.go index 65ade5b80..ad1a8a172 100644 --- a/lib/pos_blockchain.go +++ b/lib/pos_blockchain.go @@ -1722,6 +1722,11 @@ func (bc *Blockchain) commitBlockPoS(blockHash *BlockHash) error { return nil } +// GetCommittedTipView builds a UtxoView to the committed tip. +func (bc *Blockchain) GetCommittedTipView() (*UtxoView, error) { + return NewUtxoView(bc.db, bc.params, bc.postgres, bc.snapshot, nil) +} + // GetUncommittedTipView builds a UtxoView to the uncommitted tip. func (bc *Blockchain) GetUncommittedTipView() (*UtxoView, error) { // Connect the uncommitted blocks to the tip so that we can validate subsequent blocks diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index ebb6e3b86..376b6afa0 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -102,10 +102,22 @@ func (fc *FastHotStuffConsensus) Start() error { blockProductionInterval := time.Millisecond * time.Duration(fc.blockProductionIntervalMilliseconds) timeoutBaseDuration := time.Millisecond * time.Duration(fc.timeoutBaseDurationMilliseconds) - // Initialize and start the event loop - fc.fastHotStuffEventLoop.Init(blockProductionInterval, timeoutBaseDuration, genesisQC, tipBlockWithValidators[0], safeBlocksWithValidators) + // Initialize the event loop. This should never fail. If it does, we return the error to the caller. + // The caller handle the error and decide when to retry. + err = fc.fastHotStuffEventLoop.Init(blockProductionInterval, timeoutBaseDuration, genesisQC, tipBlockWithValidators[0], safeBlocksWithValidators) + if err != nil { + return errors.Errorf("FastHotStuffConsensus.Start: Error initializing FastHotStuffEventLoop: %v", err) + } + + // Start the event loop fc.fastHotStuffEventLoop.Start() + // Update the validator connections in the NetworkManager. This is a best effort operation. If it fails, + // we log the error and continue. + if err = fc.updateActiveValidatorConnections(); err != nil { + glog.Errorf("FastHotStuffConsensus.tryProcessBlockAsNewTip: Error updating validator connections: %v", err) + } + return nil } @@ -621,6 +633,12 @@ func (fc *FastHotStuffConsensus) tryProcessBlockAsNewTip(block *MsgDeSoBlock) ([ return nil, errors.Errorf("Error processing tip block locally: %v", err) } + // Update the validator connections in the NetworkManager. This is a best effort operation. If it fails, + // we log the error and continue. + if err = fc.updateActiveValidatorConnections(); err != nil { + glog.Errorf("FastHotStuffConsensus.tryProcessBlockAsNewTip: Error updating validator connections: %v", err) + } + // Happy path. The block was processed successfully and applied as the new tip. Nothing left to do. return nil, nil } @@ -794,6 +812,59 @@ func (fc *FastHotStuffConsensus) createBlockProducer(bav *UtxoView, previousBloc return blockProducer, nil } +func (fc *FastHotStuffConsensus) updateActiveValidatorConnections() error { + // Fetch the committed tip view. This ends up being as good as using the uncommitted tip view + // but without the overhead of connecting at least two blocks' worth of txns to the view. + utxoView, err := fc.blockchain.GetCommittedTipView() + if err != nil { + return errors.Errorf("FastHotStuffConsensus.Start: Error fetching uncommitted tip view: %v", err) + } + + // Get the current snapshot epoch number from the committed tip. This will be behind the uncommitted tip + // by up to two blocks, but this is fine since we fetch both the current epoch's and next epoch's validator + // sets. + snapshotEpochNumber, err := utxoView.GetCurrentSnapshotEpochNumber() + if err != nil { + return errors.Errorf("FastHotStuffConsensus.Start: Error fetching snapshot epoch number: %v", err) + } + + // Fetch the current snapshot epoch's validator set. + currentValidatorList, err := utxoView.GetAllSnapshotValidatorSetEntriesByStakeAtEpochNumber(snapshotEpochNumber) + if err != nil { + return errors.Errorf("FastHotStuffConsensus.Start: Error fetching validator list: %v", err) + } + + // Fetch the next snapshot epoch's validator set. This is useful when we're close to epoch transitions and + // allows us to pre-connect to the next epoch's validator set. In the event that there is a timeout at + // the epoch transition, reverting us to the previous epoch, this allows us to maintain connections to the + // next epoch's validators. + // + // TODO: There is an optimization we can add here to only fetch the next epoch's validator list once we're + // within 300 blocks of the next epoch. This way, we don't prematurely attempt connections to the next + // epoch's validators. In production, this will reduce the lead time with which we connect to the next epoch's + // validator set from 1 hour to 5 minutes. + nextValidatorList, err := utxoView.GetAllSnapshotValidatorSetEntriesByStakeAtEpochNumber(snapshotEpochNumber + 1) + if err != nil { + return errors.Errorf("FastHotStuffConsensus.Start: Error fetching validator list: %v", err) + } + + // Merge the current and next validator lists. Place the current epoch's validators last so that they override + // the next epoch's validators in the event of a conflict. + mergedValidatorList := append(nextValidatorList, currentValidatorList...) + validatorsMap := collections.NewConcurrentMap[bls.SerializedPublicKey, consensus.Validator]() + for _, validator := range mergedValidatorList { + if validator.VotingPublicKey.Eq(fc.signer.GetPublicKey()) { + continue + } + validatorsMap.Set(validator.VotingPublicKey.Serialize(), validator) + } + + // Update the active validators map in the network manager + fc.networkManager.SetActiveValidatorsMap(validatorsMap) + + return nil +} + // Finds the epoch entry for the block and returns the epoch number. func getEpochEntryForBlockHeight(blockHeight uint64, epochEntries []*EpochEntry) (*EpochEntry, error) { for _, epochEntry := range epochEntries { From 50db24d40eff8decb94dd493dfa26d1baa7797ae Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Tue, 20 Feb 2024 19:12:41 -0500 Subject: [PATCH 27/37] Remove Rollbacks To bestHeaderChain When bestChain is Mutated (#1055) --- lib/pos_blockchain.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/pos_blockchain.go b/lib/pos_blockchain.go index ad1a8a172..aa9d6b79d 100644 --- a/lib/pos_blockchain.go +++ b/lib/pos_blockchain.go @@ -53,6 +53,12 @@ func (bc *Blockchain) processHeaderPoS(header *MsgDeSoHeader) ( return false, false, errors.Wrapf(err, "processHeaderPoS: Problem hashing header") } + // If the incoming header is already part of the best header chain, then we can exit early. + // The header is not part of a fork, and is already an ancestor of the current header chain tip. + if _, isInBestHeaderChain := bc.bestHeaderChainMap[*headerHash]; isInBestHeaderChain { + return true, false, nil + } + // Validate the header and index it in the block index. blockNode, isOrphan, err := bc.validateAndIndexHeaderPoS(header, headerHash) if err != nil { @@ -1549,8 +1555,6 @@ func (bc *Blockchain) shouldReorg(blockNode *BlockNode, currentView uint64) bool func (bc *Blockchain) addTipBlockToBestChain(blockNode *BlockNode) { bc.bestChain = append(bc.bestChain, blockNode) bc.bestChainMap[*blockNode.Hash] = blockNode - bc.bestHeaderChain = append(bc.bestHeaderChain, blockNode) - bc.bestHeaderChainMap[*blockNode.Hash] = blockNode } // removeTipBlockFromBestChain removes the current tip from the best chain. It @@ -1562,8 +1566,6 @@ func (bc *Blockchain) removeTipBlockFromBestChain() *BlockNode { lastBlock := bc.bestChain[len(bc.bestChain)-1] delete(bc.bestChainMap, *lastBlock.Hash) bc.bestChain = bc.bestChain[:len(bc.bestChain)-1] - bc.bestHeaderChain = bc.bestHeaderChain[:len(bc.bestChain)] - delete(bc.bestHeaderChainMap, *lastBlock.Hash) return lastBlock } From e87c72b2527e4ddb1ee22277986bff611c63d0b3 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Tue, 20 Feb 2024 19:14:32 -0500 Subject: [PATCH 28/37] PoW Miner Event Loop Cleanup (#1056) --- lib/miner.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/miner.go b/lib/miner.go index 06272da42..d3bab8b62 100644 --- a/lib/miner.go +++ b/lib/miner.go @@ -198,10 +198,6 @@ func (desoMiner *DeSoMiner) MineAndProcessSingleBlock(threadIndex uint32, mempoo return nil, fmt.Errorf("DeSoMiner._startThread: _mineSingleBlock returned nil; should only happen if we're stopping") } - if desoMiner.params.IsPoSBlockHeight(blockToMine.Header.Height) { - return nil, fmt.Errorf("DeSoMiner._startThread: _mineSingleBlock returned a block that is past the Proof of Stake Cutover") - } - // Log information on the block we just mined. bestHash, _ := blockToMine.Hash() glog.Infof("================== YOU MINED A NEW BLOCK! ================== Height: %d, Hash: %s", blockToMine.Header.Height, hex.EncodeToString(bestHash[:])) @@ -293,6 +289,12 @@ func (desoMiner *DeSoMiner) _startThread(threadIndex uint32) { continue } + // Exit if blockchain has connected a block at the final PoW block height. + currentTip := desoMiner.BlockProducer.chain.blockTip() + if currentTip.Header.Height >= desoMiner.params.GetFinalPoWBlockHeight() { + return + } + newBlock, err := desoMiner.MineAndProcessSingleBlock(threadIndex, nil /*mempoolToUpdate*/) if err != nil { glog.Errorf(err.Error()) From 4239b8e16fd94fd2d0675cbbf516c4a2e85c9902 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Thu, 22 Feb 2024 12:37:06 -0500 Subject: [PATCH 29/37] Log Peer Validator Vote Messages (#1057) --- bls/signature.go | 16 ++++++++++++++++ bls/signature_no_relic.go | 8 ++++++++ lib/block_view.go | 11 +++++++++++ lib/pos_blockchain_test.go | 19 +++++++++++++++++++ lib/pos_consensus.go | 10 ++++++++-- lib/pos_network.go | 11 +++++++++++ 6 files changed, 73 insertions(+), 2 deletions(-) diff --git a/bls/signature.go b/bls/signature.go index 673f24307..83fdb7d2d 100644 --- a/bls/signature.go +++ b/bls/signature.go @@ -227,6 +227,14 @@ func (publicKey *PublicKey) FromString(publicKeyString string) (*PublicKey, erro return publicKey, err } +func (publicKey *PublicKey) ToAbbreviatedString() string { + str := publicKey.ToString() + if len(str) <= 8 { + return str + } + return str[:8] + "..." + str[len(str)-8:] +} + func (publicKey *PublicKey) MarshalJSON() ([]byte, error) { // This is called automatically by the JSON library when converting a // bls.PublicKey to JSON. This is useful when passing a bls.PublicKey @@ -324,6 +332,14 @@ func (signature *Signature) FromString(signatureString string) (*Signature, erro return signature, nil } +func (signature *Signature) ToAbbreviatedString() string { + str := signature.ToString() + if len(str) <= 8 { + return str + } + return str[:8] + "..." + str[len(str)-8:] +} + func (signature *Signature) MarshalJSON() ([]byte, error) { // This is called automatically by the JSON library when converting a // bls.Signature to JSON. This is useful when passing a bls.Signature diff --git a/bls/signature_no_relic.go b/bls/signature_no_relic.go index b3d472739..0f3988253 100644 --- a/bls/signature_no_relic.go +++ b/bls/signature_no_relic.go @@ -84,6 +84,10 @@ func (publicKey *PublicKey) FromString(publicKeyString string) (*PublicKey, erro panic(BLSNoRelicError) } +func (publicKey *PublicKey) ToAbbreviatedString() string { + panic(BLSNoRelicError) +} + func (publicKey *PublicKey) MarshalJSON() ([]byte, error) { panic(BLSNoRelicError) } @@ -136,6 +140,10 @@ func (signature *Signature) FromString(signatureString string) (*Signature, erro panic(BLSNoRelicError) } +func (signature *Signature) ToAbbreviatedString() string { + panic(BLSNoRelicError) +} + func (signature *Signature) MarshalJSON() ([]byte, error) { panic(BLSNoRelicError) } diff --git a/lib/block_view.go b/lib/block_view.go index b24641777..168f2a997 100644 --- a/lib/block_view.go +++ b/lib/block_view.go @@ -4111,6 +4111,17 @@ func (bav *UtxoView) ConnectBlock( return nil, fmt.Errorf("ConnectBlock: Parent hash of block being connected does not match tip") } + // If the block height is past the Proof of Stake cutover, then we update the random seed hash. + // We do this first before connecting any transactions so that the latest seed hash is used for + // transactions that use on-chain randomness. + if blockHeight >= uint64(bav.Params.ForkHeights.ProofOfStake2ConsensusCutoverBlockHeight) { + randomSeedHash, err := HashRandomSeedSignature(desoBlock.Header.ProposerRandomSeedSignature) + if err != nil { + return nil, errors.Wrapf(err, "ConnectBlock: Problem hashing random seed signature") + } + bav._setCurrentRandomSeedHash(randomSeedHash) + } + blockHeader := desoBlock.Header var blockRewardOutputPublicKey *btcec.PublicKey // If the block height is greater than or equal to the block reward patch height, diff --git a/lib/pos_blockchain_test.go b/lib/pos_blockchain_test.go index c6b808c18..8f40597e8 100644 --- a/lib/pos_blockchain_test.go +++ b/lib/pos_blockchain_test.go @@ -1669,6 +1669,22 @@ func _verifyCommitRuleHelper(testMeta *TestMeta, committedBlocks []*BlockHash, u } } +// _verifyRandomSeedHashHelper is a helper function that verifies the random seed hash is set +// after connecting a new tip block. +func _verifyRandomSeedHashHelper(testMeta *TestMeta, tipBlock *MsgDeSoBlock) { + // Get the utxo view for the tip block. + utxoView, err := testMeta.chain.GetUncommittedTipView() + require.NoError(testMeta.t, err) + // Verify that the random seed hash is set. + randomSeedHash, err := utxoView.GetCurrentRandomSeedHash() + require.NoError(testMeta.t, err) + + // Verify that the random seed hash is set based on the random seed signature on the block. + expectedRandomSeedHash, err := HashRandomSeedSignature(tipBlock.Header.ProposerRandomSeedSignature) + require.NoError(testMeta.t, err) + require.True(testMeta.t, expectedRandomSeedHash.Eq(randomSeedHash)) +} + func TestProcessHeaderPoS(t *testing.T) { // Initialize the chain and test metadata. testMeta := NewTestPoSBlockchainWithValidators(t) @@ -1746,6 +1762,7 @@ func testProcessBlockPoS(t *testing.T, testMeta *TestMeta) { blockHash1, err = realBlock.Hash() require.NoError(t, err) _verifyCommitRuleHelper(testMeta, []*BlockHash{}, []*BlockHash{blockHash1}, nil) + _verifyRandomSeedHashHelper(testMeta, realBlock) } var blockHash2, blockHash3, futureBlockHash *BlockHash @@ -1768,6 +1785,7 @@ func testProcessBlockPoS(t *testing.T, testMeta *TestMeta) { require.NoError(t, err) _verifyCommitRuleHelper(testMeta, []*BlockHash{blockHash1}, []*BlockHash{blockHash2, blockHash3}, blockHash1) + _verifyRandomSeedHashHelper(testMeta, realBlock3) // Now let's try adding a block that has a timestamp too far in the future, and make sure it's stored. var futureBlock *MsgDeSoBlock @@ -1816,6 +1834,7 @@ func testProcessBlockPoS(t *testing.T, testMeta *TestMeta) { // We expect blockHash1 and blockHash2 to be committed, but blockHash3 and reorgBlockHash to not be committed. // Timeout block will no longer be in best chain, and will still be in an uncommitted state in the block index _verifyCommitRuleHelper(testMeta, []*BlockHash{blockHash1, blockHash2}, []*BlockHash{blockHash3, reorgBlockHash}, blockHash2) + _verifyRandomSeedHashHelper(testMeta, reorgBlock) _, exists := testMeta.chain.bestChainMap[*timeoutBlockHash] require.False(t, exists) diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index 376b6afa0..624efce88 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -384,6 +384,8 @@ func (fc *FastHotStuffConsensus) HandleLocalVoteEvent(event *consensus.FastHotSt // HandleValidatorVote is called when we receive a validator vote message from a peer. This function processes // the vote locally in the FastHotStuffEventLoop. func (fc *FastHotStuffConsensus) HandleValidatorVote(pp *Peer, msg *MsgDeSoValidatorVote) error { + glog.V(2).Infof("FastHotStuffConsensus.HandleValidatorVote: Received vote msg %s", msg.ToString()) + // No need to hold a lock on the consensus because this function is a pass-through // for the FastHotStuffEventLoop which guarantees thread-safety for its callers @@ -391,7 +393,8 @@ func (fc *FastHotStuffConsensus) HandleValidatorVote(pp *Peer, msg *MsgDeSoValid if err := fc.fastHotStuffEventLoop.ProcessValidatorVote(msg); err != nil { // If we can't process the vote locally, then it must somehow be malformed, stale, // or a duplicate vote/timeout for the same view. - return errors.Wrapf(err, "FastHotStuffConsensus.HandleValidatorVote: Error processing vote: ") + glog.Errorf("FastHotStuffConsensus.HandleValidatorVote: Error processing vote msg: %v", err) + return errors.Wrapf(err, "FastHotStuffConsensus.HandleValidatorVote: Error processing vote msg: ") } // Happy path @@ -797,6 +800,9 @@ func (fc *FastHotStuffConsensus) createBlockProducer(bav *UtxoView, previousBloc if err != nil { return nil, errors.Errorf("Error fetching validator entry for block producer: %v", err) } + if blockProducerValidatorEntry == nil { + return nil, errors.New("Error fetching validator entry for block producer") + } blockProducerPublicKeyBytes := bav.GetPublicKeyForPKID(blockProducerValidatorEntry.PKID) blockProducerPublicKey := NewPublicKey(blockProducerPublicKeyBytes) if blockProducerPublicKey == nil { @@ -933,7 +939,7 @@ func (fc *FastHotStuffConsensus) logBlockProposal(block *MsgDeSoBlock, blockHash "\n High QC View: %d, High QC Num Validators: %d, High QC BlockHash: %s"+ "\n Timeout Agg QC View: %d, Timeout Agg QC Num Validators: %d, Timeout High QC Views: %s"+ "\n Num Block Transactions: %d, Num Transactions Remaining In Mempool: %d"+ - "\n=================================================================================================================", + "\n=================================================================================================================\n", block.Header.GetTstampSecs(), block.Header.GetView(), block.Header.Height, blockHash.String(), block.Header.ProposerVotingPublicKey.ToString(), block.Header.ProposerVotePartialSignature.ToString(), diff --git a/lib/pos_network.go b/lib/pos_network.go index 891315937..324cbb9d1 100644 --- a/lib/pos_network.go +++ b/lib/pos_network.go @@ -123,6 +123,17 @@ func (msg *MsgDeSoValidatorVote) FromBytes(data []byte) error { return nil } +func (msg *MsgDeSoValidatorVote) ToString() string { + return fmt.Sprintf( + "{MsgVersion: %d, VotingPublicKey: %s, BlockHash: %v, ProposedInView: %d, VotePartialSignature: %v}", + msg.MsgVersion, + msg.VotingPublicKey.ToAbbreviatedString(), + msg.BlockHash, + msg.ProposedInView, + msg.VotePartialSignature.ToAbbreviatedString(), + ) +} + // ================================================================== // Proof of Stake Timeout Message // ================================================================== From 3d3b8165b3e2d9d8888ef30c36dfca4f054f633b Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Thu, 22 Feb 2024 12:38:29 -0500 Subject: [PATCH 30/37] Log Peer Validator Timeout Messages (#1058) --- lib/pos_consensus.go | 7 ++++++- lib/pos_network.go | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index 624efce88..b700f6a88 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -505,6 +505,8 @@ func (fc *FastHotStuffConsensus) HandleLocalTimeoutEvent(event *consensus.FastHo // HandleValidatorTimeout is called when we receive a validator timeout message from a peer. This function // processes the timeout locally in the FastHotStuffEventLoop. func (fc *FastHotStuffConsensus) HandleValidatorTimeout(pp *Peer, msg *MsgDeSoValidatorTimeout) error { + glog.V(2).Infof("FastHotStuffConsensus.HandleLocalTimeoutEvent: Received timeout msg: %s", msg.ToString()) + // No need to hold a lock on the consensus because this function is a pass-through // for the FastHotStuffEventLoop which guarantees thread-safety for its callers. @@ -512,7 +514,8 @@ func (fc *FastHotStuffConsensus) HandleValidatorTimeout(pp *Peer, msg *MsgDeSoVa if err := fc.fastHotStuffEventLoop.ProcessValidatorTimeout(msg); err != nil { // If we can't process the timeout locally, then it must somehow be malformed, stale, // or a duplicate vote/timeout for the same view. - return errors.Wrapf(err, "FastHotStuffConsensus.HandleValidatorTimeout: Error processing timeout: ") + glog.Errorf("FastHotStuffConsensus.HandleValidatorTimeout: Error processing timeout msg: %v", err) + return errors.Wrapf(err, "FastHotStuffConsensus.HandleValidatorTimeout: Error processing timeout msg: ") } // Happy path @@ -936,6 +939,7 @@ func (fc *FastHotStuffConsensus) logBlockProposal(block *MsgDeSoBlock, blockHash "\n Timestamp: %d, View: %d, Height: %d, BlockHash: %v"+ "\n Proposer Voting PKey: %s"+ "\n Proposer Signature: %s"+ + "\n Proposer Random Seed Signature: %s"+ "\n High QC View: %d, High QC Num Validators: %d, High QC BlockHash: %s"+ "\n Timeout Agg QC View: %d, Timeout Agg QC Num Validators: %d, Timeout High QC Views: %s"+ "\n Num Block Transactions: %d, Num Transactions Remaining In Mempool: %d"+ @@ -943,6 +947,7 @@ func (fc *FastHotStuffConsensus) logBlockProposal(block *MsgDeSoBlock, blockHash block.Header.GetTstampSecs(), block.Header.GetView(), block.Header.Height, blockHash.String(), block.Header.ProposerVotingPublicKey.ToString(), block.Header.ProposerVotePartialSignature.ToString(), + block.Header.ProposerRandomSeedSignature.ToString(), block.Header.GetQC().GetView(), block.Header.GetQC().GetAggregatedSignature().GetSignersList().Size(), block.Header.PrevBlockHash.String(), aggQCView, aggQCNumValidators, aggQCHighQCViews, len(block.Txns), len(fc.mempool.GetTransactions()), diff --git a/lib/pos_network.go b/lib/pos_network.go index 324cbb9d1..08aae1bbc 100644 --- a/lib/pos_network.go +++ b/lib/pos_network.go @@ -251,6 +251,18 @@ func (msg *MsgDeSoValidatorTimeout) FromBytes(data []byte) error { return nil } +func (msg *MsgDeSoValidatorTimeout) ToString() string { + return fmt.Sprintf( + "{MsgVersion: %d, VotingPublicKey: %s, TimedOutView: %d, HighQCView: %v, HighQCBlockHash: %v, TimeoutPartialSignature: %s}", + msg.MsgVersion, + msg.VotingPublicKey.ToAbbreviatedString(), + msg.TimedOutView, + msg.HighQC.ProposedInView, + msg.HighQC.BlockHash, + msg.TimeoutPartialSignature.ToAbbreviatedString(), + ) +} + // A QuorumCertificate contains an aggregated signature from 2/3rds of the validators // on the network, weighted by stake. The signatures are associated with a block hash // and a view, both of which are identified in the certificate. From f10d6dcaf5805cd8d35850445aaf00acf196c376 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Thu, 22 Feb 2024 14:48:59 -0500 Subject: [PATCH 31/37] Request Missing HighQC Block For Timeout Message (#1061) * Request Missing HighQC Block For Timeout Message * Address Nina's comments --- lib/blockchain.go | 8 ++++++++ lib/pos_consensus.go | 30 ++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/lib/blockchain.go b/lib/blockchain.go index 3a8b9484c..764a53681 100644 --- a/lib/blockchain.go +++ b/lib/blockchain.go @@ -1146,6 +1146,14 @@ func (bc *Blockchain) HasBlock(blockHash *BlockHash) bool { return true } +func (bc *Blockchain) HasBlockInBlockIndex(blockHash *BlockHash) bool { + bc.ChainLock.RLock() + defer bc.ChainLock.RUnlock() + + _, exists := bc.blockIndexByHash[*blockHash] + return exists +} + // This needs to hold a lock on the blockchain because it read from an in-memory map that is // not thread-safe. func (bc *Blockchain) GetBlockHeaderFromIndex(blockHash *BlockHash) *MsgDeSoHeader { diff --git a/lib/pos_consensus.go b/lib/pos_consensus.go index b700f6a88..fac24e887 100644 --- a/lib/pos_consensus.go +++ b/lib/pos_consensus.go @@ -507,8 +507,23 @@ func (fc *FastHotStuffConsensus) HandleLocalTimeoutEvent(event *consensus.FastHo func (fc *FastHotStuffConsensus) HandleValidatorTimeout(pp *Peer, msg *MsgDeSoValidatorTimeout) error { glog.V(2).Infof("FastHotStuffConsensus.HandleLocalTimeoutEvent: Received timeout msg: %s", msg.ToString()) - // No need to hold a lock on the consensus because this function is a pass-through - // for the FastHotStuffEventLoop which guarantees thread-safety for its callers. + // Hold a write lock on the consensus, since we need to update the timeout message in the + // FastHotStuffEventLoop. + fc.lock.Lock() + defer fc.lock.Unlock() + + if !fc.fastHotStuffEventLoop.IsRunning() { + return errors.Errorf("FastHotStuffConsensus.HandleValidatorTimeout: FastHotStuffEventLoop is not running") + } + + // If we don't have the highQC's block on hand, then we need to request it from the peer. We do + // that first before storing the timeout message locally in the FastHotStuffEventLoop. This + // prevents spamming of timeout messages by peers. + if !fc.blockchain.HasBlockInBlockIndex(msg.HighQC.BlockHash) { + fc.trySendMessageToPeer(pp, &MsgDeSoGetBlocks{HashList: []*BlockHash{msg.HighQC.BlockHash}}) + glog.Errorf("FastHotStuffConsensus.HandleValidatorTimeout: Requesting missing highQC's block: %v", msg.HighQC.BlockHash) + return errors.Errorf("FastHotStuffConsensus.HandleValidatorTimeout: Missing highQC's block: %v", msg.HighQC.BlockHash) + } // Process the timeout message locally in the FastHotStuffEventLoop if err := fc.fastHotStuffEventLoop.ProcessValidatorTimeout(msg); err != nil { @@ -874,6 +889,17 @@ func (fc *FastHotStuffConsensus) updateActiveValidatorConnections() error { return nil } +func (fc *FastHotStuffConsensus) trySendMessageToPeer(pp *Peer, msg DeSoMessage) { + remoteNode := fc.networkManager.rnManager.GetRemoteNodeFromPeer(pp) + if remoteNode == nil { + glog.Errorf("FastHotStuffConsensus.trySendMessageToPeer: RemoteNode not found for peer: %v", pp) + return + } + + // Send the message to the peer + remoteNode.SendMessage(msg) +} + // Finds the epoch entry for the block and returns the epoch number. func getEpochEntryForBlockHeight(blockHeight uint64, epochEntries []*EpochEntry) (*EpochEntry, error) { for _, epochEntry := range epochEntries { From 5d1bb335153d6b19651c6387e5393514d5310054 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Thu, 22 Feb 2024 16:34:05 -0500 Subject: [PATCH 32/37] Remove Peer Disconnects in Server._handleBlock (#1062) * Remove Peer Disconnects in Server._handleBlock * Warning for unrequested blocks * Catch case for spam prevention check * Address Nina's comment --- lib/pos_blockchain.go | 3 ++- lib/server.go | 39 ++++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/lib/pos_blockchain.go b/lib/pos_blockchain.go index aa9d6b79d..e0bea2745 100644 --- a/lib/pos_blockchain.go +++ b/lib/pos_blockchain.go @@ -263,7 +263,7 @@ func (bc *Blockchain) processBlockPoS(block *MsgDeSoBlock, currentView uint64, v } if !passedSpamPreventionCheck { // If the block fails the spam prevention check, we throw it away. - return false, false, nil, errors.New("processBlockPoS: Block failed spam prevention check") + return false, false, nil, errors.Wrapf(RuleErrorFailedSpamPreventionsCheck, "processBlockPoS: Block failed spam prevention check: ") } // Validate the block and store it in the block index. The block is guaranteed to not be an orphan. @@ -1929,6 +1929,7 @@ const ( RuleErrorAncestorBlockValidationFailed RuleError = "RuleErrorAncestorBlockValidationFailed" RuleErrorParentBlockHasViewGreaterOrEqualToChildBlock RuleError = "RuleErrorParentBlockHasViewGreaterOrEqualToChildBlock" RuleErrorParentBlockHeightNotSequentialWithChildBlockHeight RuleError = "RuleErrorParentBlockHeightNotSequentialWithChildBlockHeight" + RuleErrorFailedSpamPreventionsCheck RuleError = "RuleErrorFailedSpamPreventionsCheck" RuleErrorNilMerkleRoot RuleError = "RuleErrorNilMerkleRoot" RuleErrorInvalidMerkleRoot RuleError = "RuleErrorInvalidMerkleRoot" diff --git a/lib/server.go b/lib/server.go index 38da289cf..185f7a75f 100644 --- a/lib/server.go +++ b/lib/server.go @@ -2102,15 +2102,14 @@ func (srv *Server) _handleBlock(pp *Peer, blk *MsgDeSoBlock) { return } - if pp != nil { - if _, exists := pp.requestedBlocks[*blockHash]; !exists { - glog.Errorf("_handleBlock: Getting a block that we haven't requested before, "+ - "block hash (%v)", *blockHash) - } - delete(pp.requestedBlocks, *blockHash) - } else { - glog.Errorf("_handleBlock: Called with nil peer, this should never happen.") + // Log a warning if we receive a block we haven't requested yet. It is still possible to receive + // a block in this case if we're connected directly to the block producer and they send us a block + // directly. + if _, exists := pp.requestedBlocks[*blockHash]; !exists { + glog.Warningf("_handleBlock: Getting a block that we haven't requested before, "+ + "block hash (%v)", *blockHash) } + delete(pp.requestedBlocks, *blockHash) // Check that the mempool has not received a transaction that would forbid this block's signature pubkey. // This is a minimal check, a more thorough check is made in the ProcessBlock function. This check is @@ -2157,20 +2156,28 @@ func (srv *Server) _handleBlock(pp *Peer, blk *MsgDeSoBlock) { // headers comment above but in the future we should probably try and figure // out a way to be more strict about things. glog.Warningf("Got duplicate block %v from peer %v", blk, pp) + } else if strings.Contains(err.Error(), RuleErrorFailedSpamPreventionsCheck.Error()) { + // If the block fails the spam prevention check, then it must be signed by the + // bad block proposer signature or it has a bad QC. In either case, we should + // disconnect the peer. + srv._logAndDisconnectPeer(pp, blk, errors.Wrapf(err, "Error while processing block: ").Error()) + return } else { - srv._logAndDisconnectPeer( - pp, blk, - errors.Wrapf(err, "Error while processing block: ").Error()) + // For any other error, we log the error and continue. + glog.Errorf("Server._handleBlock: Error while processing block: %v", err) return } } + if isOrphan { - // We should generally never receive orphan blocks. It indicates something - // went wrong in our headers syncing. - glog.Errorf("ERROR: Received orphan block with hash %v height %v. "+ + // It's possible to receive an orphan block if we're connected directly to the + // block producer, and they are broadcasting blocks in the steady state. We log + // a warning in this case and move on. + glog.Warningf("ERROR: Received orphan block with hash %v height %v. "+ "This should never happen", blockHash, blk.Header.Height) return } + srv.timer.End("Server._handleBlock: Process Block") srv.timer.Print("Server._handleBlock: General") @@ -2178,9 +2185,7 @@ func (srv *Server) _handleBlock(pp *Peer, blk *MsgDeSoBlock) { // We shouldn't be receiving blocks while syncing headers. if srv.blockchain.chainState() == SyncStateSyncingHeaders { - srv._logAndDisconnectPeer( - pp, blk, - "We should never get blocks when we're syncing headers") + glog.Warningf("Server._handleBlock: Received block while syncing headers: %v", blk) return } From ac60b950e900bc4f201fcce985530fbcddb4e17c Mon Sep 17 00:00:00 2001 From: iamsofonias Date: Thu, 22 Feb 2024 17:12:03 -0500 Subject: [PATCH 33/37] Change log level for consensus events --- lib/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/server.go b/lib/server.go index 185f7a75f..19caef905 100644 --- a/lib/server.go +++ b/lib/server.go @@ -2658,7 +2658,7 @@ func (srv *Server) _startConsensus() { select { case consensusEvent := <-srv._getFastHotStuffConsensusEventChannel(): { - glog.Infof("Server._startConsensus: Received consensus event: %s", consensusEvent.ToString()) + glog.V(2).Infof("Server._startConsensus: Received consensus event: %s", consensusEvent.ToString()) srv._handleFastHostStuffConsensusEvent(consensusEvent) } From c1a4cbb2d5f88220c44912dc177d3b273327a1f3 Mon Sep 17 00:00:00 2001 From: iamsofonias Date: Thu, 22 Feb 2024 17:14:39 -0500 Subject: [PATCH 34/37] Update regtest block producer domain --- lib/pos_server_regtest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pos_server_regtest.go b/lib/pos_server_regtest.go index e4a339416..bd74780d6 100644 --- a/lib/pos_server_regtest.go +++ b/lib/pos_server_regtest.go @@ -25,7 +25,7 @@ func (srv *Server) submitRegtestValidatorRegistrationTxns(block *MsgDeSoBlock) { } txnMeta := RegisterAsValidatorMetadata{ - Domains: [][]byte{[]byte("https://deso.com")}, + Domains: [][]byte{[]byte("http://localhost:18000")}, DisableDelegatedStake: false, DelegatedStakeCommissionBasisPoints: 100, VotingPublicKey: blsSigner.GetPublicKey(), From 19e8511a9c4ac115d2d66947140b56921624ebe4 Mon Sep 17 00:00:00 2001 From: tholonious <99746187+tholonious@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:38:03 -0500 Subject: [PATCH 35/37] Prevent Commited Tip Reorgs in ProcessHeaderPoS (#1063) * Prevent Commited Tip Reorgs in ProcessHeaderPoS * Clean up error message --- lib/pos_blockchain.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/pos_blockchain.go b/lib/pos_blockchain.go index e0bea2745..45a5f2a69 100644 --- a/lib/pos_blockchain.go +++ b/lib/pos_blockchain.go @@ -59,6 +59,13 @@ func (bc *Blockchain) processHeaderPoS(header *MsgDeSoHeader) ( return true, false, nil } + // If the incoming header is part of a reorg that uncommits the committed tip from the best chain, + // then we exit early. Such headers are invalid and should not be synced. + committedBlockchainTip, _ := bc.getCommittedTip() + if committedBlockchainTip != nil && committedBlockchainTip.Header.Height >= header.Height { + return false, false, errors.New("processHeaderPoS: Header conflicts with committed tip") + } + // Validate the header and index it in the block index. blockNode, isOrphan, err := bc.validateAndIndexHeaderPoS(header, headerHash) if err != nil { From cf47baabc11ca353c7624afd2022cb5e6e920e06 Mon Sep 17 00:00:00 2001 From: Lazy Nina <81658138+lazynina@users.noreply.github.com> Date: Mon, 26 Feb 2024 15:25:48 -0500 Subject: [PATCH 36/37] Add Estimate fee rate to mempool interface (#1066) --- lib/legacy_mempool.go | 16 +++++++++++++++- lib/pos_fee_estimator.go | 14 ++++++++------ lib/pos_mempool.go | 21 ++++++++++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/lib/legacy_mempool.go b/lib/legacy_mempool.go index c7e3bb770..9617ab0dc 100644 --- a/lib/legacy_mempool.go +++ b/lib/legacy_mempool.go @@ -2446,7 +2446,21 @@ func EstimateMaxTxnFeeV1(txn *MsgDeSoTxn, minFeeRateNanosPerKB uint64) uint64 { func (mp *DeSoMempool) EstimateFee(txn *MsgDeSoTxn, minFeeRateNanosPerKB uint64, _ uint64, _ uint64, _ uint64, _ uint64, _ uint64) (uint64, error) { - return EstimateMaxTxnFeeV1(txn, minFeeRateNanosPerKB), nil + feeRate, _ := mp.EstimateFeeRate(minFeeRateNanosPerKB, 0, 0, 0, 0, 0) + return EstimateMaxTxnFeeV1(txn, feeRate), nil +} + +func (mp *DeSoMempool) EstimateFeeRate( + minFeeRateNanosPerKB uint64, + _ uint64, + _ uint64, + _ uint64, + _ uint64, + _ uint64) (uint64, error) { + if minFeeRateNanosPerKB < mp.readOnlyUtxoView.GlobalParamsEntry.MinimumNetworkFeeNanosPerKB { + return mp.readOnlyUtxoView.GlobalParamsEntry.MinimumNetworkFeeNanosPerKB, nil + } + return minFeeRateNanosPerKB, nil } func convertMempoolTxsToSummaryStats(mempoolTxs []*MempoolTx) map[string]*SummaryStats { diff --git a/lib/pos_fee_estimator.go b/lib/pos_fee_estimator.go index aae60d662..ad43ca405 100644 --- a/lib/pos_fee_estimator.go +++ b/lib/pos_fee_estimator.go @@ -230,16 +230,18 @@ func (posFeeEstimator *PoSFeeEstimator) sortCachedBlocks() { // and past blocks using the congestionFactorBasisPoints, priorityPercentileBasisPoints, and // maxBlockSize params. func (posFeeEstimator *PoSFeeEstimator) EstimateFeeRateNanosPerKB( - congestionFactorBasisPoints uint64, - priorityPercentileBasisPoints uint64, + mempoolCongestionFactorBasisPoints uint64, + mempoolPriorityPercentileBasisPoints uint64, + pastBlocksCongestionFactorBasisPoints uint64, + pastBlocksPriorityPercentileBasisPoints uint64, maxBlockSize uint64, ) (uint64, error) { posFeeEstimator.rwLock.RLock() defer posFeeEstimator.rwLock.RUnlock() pastBlockFeeRate, err := posFeeEstimator.estimateFeeRateNanosPerKBGivenTransactionRegister( posFeeEstimator.pastBlocksTransactionRegister, - congestionFactorBasisPoints, - priorityPercentileBasisPoints, + pastBlocksCongestionFactorBasisPoints, + pastBlocksPriorityPercentileBasisPoints, posFeeEstimator.numPastBlocks, maxBlockSize, ) @@ -248,8 +250,8 @@ func (posFeeEstimator *PoSFeeEstimator) EstimateFeeRateNanosPerKB( } mempoolFeeRate, err := posFeeEstimator.estimateFeeRateNanosPerKBGivenTransactionRegister( posFeeEstimator.mempoolTransactionRegister, - congestionFactorBasisPoints, - priorityPercentileBasisPoints, + mempoolCongestionFactorBasisPoints, + mempoolPriorityPercentileBasisPoints, posFeeEstimator.numMempoolBlocks, maxBlockSize, ) diff --git a/lib/pos_mempool.go b/lib/pos_mempool.go index 864b78ed2..7aaceb81c 100644 --- a/lib/pos_mempool.go +++ b/lib/pos_mempool.go @@ -51,6 +51,14 @@ type Mempool interface { pastBlocksPriorityPercentileBasisPoints uint64, maxBlockSize uint64, ) (uint64, error) + EstimateFeeRate( + minFeeRateNanosPerKB uint64, + mempoolCongestionFactorBasisPoints uint64, + mempoolPriorityPercentileBasisPoints uint64, + pastBlocksCongestionFactorBasisPoints uint64, + pastBlocksPriorityPercentileBasisPoints uint64, + maxBlockSize uint64, + ) (uint64, error) } type MempoolIterator interface { @@ -911,8 +919,19 @@ func (mp *PosMempool) EstimateFee(txn *MsgDeSoTxn, pastBlocksCongestionFactorBasisPoints uint64, pastBlocksPriorityPercentileBasisPoints uint64, maxBlockSize uint64) (uint64, error) { - // TODO: replace MaxBasisPoints with variables configured by flags. return mp.feeEstimator.EstimateFee( txn, mempoolCongestionFactorBasisPoints, mempoolPriorityPercentileBasisPoints, pastBlocksCongestionFactorBasisPoints, pastBlocksPriorityPercentileBasisPoints, maxBlockSize) } + +func (mp *PosMempool) EstimateFeeRate( + _ uint64, + mempoolCongestionFactorBasisPoints uint64, + mempoolPriorityPercentileBasisPoints uint64, + pastBlocksCongestionFactorBasisPoints uint64, + pastBlocksPriorityPercentileBasisPoints uint64, + maxBlockSize uint64) (uint64, error) { + return mp.feeEstimator.EstimateFeeRateNanosPerKB( + mempoolCongestionFactorBasisPoints, mempoolPriorityPercentileBasisPoints, + pastBlocksCongestionFactorBasisPoints, pastBlocksPriorityPercentileBasisPoints, maxBlockSize) +} From 76bb86cf82bde2f37fd80fcb9e893f803cb420a8 Mon Sep 17 00:00:00 2001 From: Lazy Nina Date: Sat, 24 Feb 2024 12:52:29 -0500 Subject: [PATCH 37/37] Add flags for default values for fee estimation --- cmd/config.go | 12 ++++++++++++ cmd/node.go | 4 ++++ cmd/run.go | 14 ++++++++++++++ lib/server.go | 4 ++++ 4 files changed, 34 insertions(+) diff --git a/cmd/config.go b/cmd/config.go index 3e3a47f97..d18a9fa42 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -55,6 +55,12 @@ type Config struct { MempoolFeeEstimatorNumPastBlocks uint64 AugmentedBlockViewRefreshIntervalMillis uint64 + // Mempool Fee Estimator + MempoolCongestionFactorBasisPoints uint64 + MempoolPriorityPercentileBasisPoints uint64 + PastBlocksCongestionFactorBasisPoints uint64 + PastBlocksPriorityPercentileBasisPoints uint64 + // Mining MinerPublicKeys []string NumMiningThreads uint64 @@ -132,6 +138,12 @@ func LoadConfig() *Config { config.MempoolFeeEstimatorNumPastBlocks = viper.GetUint64("mempool-fee-estimator-num-past-blocks") config.AugmentedBlockViewRefreshIntervalMillis = viper.GetUint64("augmented-block-view-refresh-interval-millis") + // Mempool Fee Estimator + config.MempoolCongestionFactorBasisPoints = viper.GetUint64("mempool-congestion-factor-basis-points") + config.MempoolPriorityPercentileBasisPoints = viper.GetUint64("mempool-priority-percentile-basis-points") + config.PastBlocksCongestionFactorBasisPoints = viper.GetUint64("past-blocks-congestion-factor-basis-points") + config.PastBlocksPriorityPercentileBasisPoints = viper.GetUint64("past-blocks-priority-percentile-basis-points") + // Peers config.ConnectIPs = viper.GetStringSlice("connect-ips") config.AddIPs = viper.GetStringSlice("add-ips") diff --git a/cmd/node.go b/cmd/node.go index 74dc9afce..a381c20a5 100644 --- a/cmd/node.go +++ b/cmd/node.go @@ -282,6 +282,10 @@ func (node *Node) Start(exitChannels ...*chan struct{}) { node.Config.AugmentedBlockViewRefreshIntervalMillis, node.Config.PosBlockProductionIntervalMilliseconds, node.Config.PosTimeoutBaseDurationMilliseconds, + node.Config.MempoolCongestionFactorBasisPoints, + node.Config.MempoolPriorityPercentileBasisPoints, + node.Config.PastBlocksCongestionFactorBasisPoints, + node.Config.PastBlocksPriorityPercentileBasisPoints, ) if err != nil { // shouldRestart can be true if, on the previous run, we did not finish flushing all ancestral diff --git a/cmd/run.go b/cmd/run.go index c5ebe87a8..1bbe74968 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -106,6 +106,20 @@ func SetupRunFlags(cmd *cobra.Command) { "The frequency in milliseconds with which the augmented block view will be refreshed. "+ "The default value is 100 milliseconds.") + // TODO: what are the proper defaults here? + cmd.PersistentFlags().Uint64("mempool-congestion-factor-basis-points", lib.MaxBasisPoints, + "The congestion factor (in basis points) applied to the mempool used when computing fee rate. "+ + "Defaults to 10000, which is 100%") + cmd.PersistentFlags().Uint64("mempool-priority-percentile-basis-points", lib.MaxBasisPoints, + "The priority percentile (in basis points) of the mempool used when computing fee rate. "+ + "Defaults to 10000, which is 100%") + cmd.PersistentFlags().Uint64("past-blocks-congestion-factor-basis-points", lib.MaxBasisPoints, + "The congestion factor (in basis points) applied to the past blocks used when computing fee rate. "+ + "Defaults to 10000, which is 100%") + cmd.PersistentFlags().Uint64("past-blocks-priority-percentile-basis-points", lib.MaxBasisPoints, + "The priority percentile (in basis points) of the past blocks used when computing fee rate. "+ + "Defaults to 10000, which is 100%") + // Peers cmd.PersistentFlags().StringSlice("connect-ips", []string{}, "A comma-separated list of ip:port addresses that we should connect to on startup. "+ diff --git a/lib/server.go b/lib/server.go index 19caef905..58e010b95 100644 --- a/lib/server.go +++ b/lib/server.go @@ -409,6 +409,10 @@ func NewServer( _augmentedBlockViewRefreshIntervalMillis uint64, _posBlockProductionIntervalMilliseconds uint64, _posTimeoutBaseDurationMilliseconds uint64, + _mempoolCongestionFactorBasisPoints uint64, + _mempoolPriorityPercentileBasisPoints uint64, + _pastBlocksCongestionFactorBasisPoints uint64, + _pastBlocksPriorityPercentileBasisPoints uint64, ) ( _srv *Server, _err error,