@@ -1618,18 +1618,112 @@ func TestIntentScannerOnError(t *testing.T) {
16181618 }
16191619 err := s .Start (stopper , erroringScanConstructor )
16201620 require .ErrorContains (t , err , "scanner error" )
1621+ }
1622+
1623+ // TestProcessorMemoryAccountingOnError tests that when a
1624+ // buffered sender disconnects because of an error, the memory budget continues
1625+ // to account for any previously buffered events until they are actually sent.
1626+ //
1627+ // Note, this tests the case where the error is a memory overflow, but any error
1628+ // that disconnects our registration could have been used.
1629+ func TestProcessorMemoryAccountingOnError (t * testing.T ) {
1630+ defer leaktest .AfterTest (t )()
1631+
1632+ ctx := context .Background ()
1633+ stopper := stop .NewStopper ()
1634+ defer stopper .Stop (ctx )
1635+
1636+ queueCap := int64 (10 )
1637+ streamID := int64 (1 )
1638+
1639+ st := cluster .MakeTestingClusterSettings ()
1640+ RangefeedSingleBufferedSenderQueueMaxPerReg .Override (ctx , & st .SV , queueCap )
1641+
1642+ fb := newTestBudget (math .MaxInt64 )
1643+ testServerStream := newTestServerStream ()
1644+ bs := NewBufferedSender (testServerStream , st , NewBufferedSenderMetrics ())
1645+
1646+ smMetrics := NewStreamManagerMetrics ()
1647+ sm := NewStreamManager (bs , smMetrics )
1648+ require .NoError (t , sm .Start (ctx , stopper ))
1649+ defer sm .Stop (ctx )
1650+
1651+ // Create a processor with our budget.
1652+ p , h , pStopper := newTestProcessor (t ,
1653+ withBudget (fb ),
1654+ withRangefeedTestType (scheduledProcessorWithBufferedSender ))
1655+ defer pStopper .Stop (ctx )
1656+
1657+ // Block the sender so the buffer will fill up.
1658+ unblock := testServerStream .BlockSend ()
1659+ defer func () {
1660+ if unblock != nil {
1661+ unblock ()
1662+ }
1663+ }()
1664+
1665+ startTime := hlc.Timestamp {WallTime : 1 }
1666+ sm .RegisteringStream (streamID )
1667+ registered , d , _ := p .Register (
1668+ ctx ,
1669+ roachpb.RSpan {Key : roachpb .RKey ("a" ), EndKey : roachpb .RKey ("z" )},
1670+ startTime ,
1671+ nil , /* catchUpIter */
1672+ false , /* withDiff */
1673+ false , /* withFiltering */
1674+ false , /* withOmitRemote */
1675+ noBulkDelivery ,
1676+ sm .NewStream (streamID , 1 /* rangeID */ ),
1677+ )
1678+ require .True (t , registered )
1679+ sm .AddStream (streamID , d )
16211680
1622- // The processor should be stopped eventually.
1623- p := (s ).(* ScheduledProcessor )
1681+ // Overflow the queue.
1682+ for i := range queueCap + 1 {
1683+ v := writeValueOpWithKV (roachpb .Key ("k" ), hlc.Timestamp {WallTime : startTime .WallTime + i + 1 }, []byte ("val" ))
1684+ require .True (t , p .ConsumeLogicalOps (ctx , v ))
1685+ }
1686+
1687+ // Once all events have been sent to the registration, we should be overflowed
1688+ // and disconnection.
1689+ h .syncEventC ()
16241690 testutils .SucceedsSoon (t , func () error {
1625- select {
1626- case <- p .stoppedC :
1627- _ , ok := sch .shards [shardIndex (p .ID (), len (sch .shards ), p .Priority )].procs [p .ID ()]
1628- require .False (t , ok )
1629- require .False (t , sch .priorityIDs .Contains (p .ID ()))
1691+ if d .IsDisconnected () {
16301692 return nil
1631- default :
1632- return errors .New ("processor not stopped" )
16331693 }
1694+ return errors .New ("waiting for registration to disconnect" )
1695+ })
1696+
1697+ // At this point, the registration should be disconnected but the buffered
1698+ // sender still has events in its queue. Assert that the memory budget still
1699+ // accounts for the memory in that queue.
1700+ //
1701+ // NB: This could be racy if change the structure of the code in the future.
1702+ // Namely, perhaps it isn't zero, now, but perhaps it becomes zero at some
1703+ // time in the future. We try to defend against that here by sending 2 sync
1704+ // events to help ensure we've definitely processed any processor requests.
1705+ //
1706+ // At the time this test was written, this test caught the bug on every run.
1707+ h .syncEventC ()
1708+ h .syncEventC ()
1709+
1710+ fb .mu .Lock ()
1711+ budgetUsed := fb .mu .memBudget .Used ()
1712+ fb .mu .Unlock ()
1713+ require .Greater (t , budgetUsed , int64 (0 ),
1714+ "memory budget should still account for events in buffered sender after overflow" )
1715+
1716+ // Unblocking the sender should drain the queue and free everything from the
1717+ // memory budget.
1718+ unblock ()
1719+ unblock = nil
1720+
1721+ testutils .SucceedsSoon (t , func () error {
1722+ fb .mu .Lock ()
1723+ defer fb .mu .Unlock ()
1724+ if used := fb .mu .memBudget .Used (); used != 0 {
1725+ return errors .Errorf ("budget still has %d bytes allocated" , used )
1726+ }
1727+ return nil
16341728 })
16351729}
0 commit comments