Skip to content

Hitless upgrades #3021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 37 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
bf647bd
--wip-- [skip ci]
nkaradzhov Jun 24, 2025
0484bb2
--wip-- [skip ci]
nkaradzhov Jun 26, 2025
8e04398
expose new options
nkaradzhov Jul 11, 2025
d509e71
implement queue drain mechanism
nkaradzhov Jul 11, 2025
b646adc
fix typo
nkaradzhov Jul 14, 2025
134a703
fix proxy bug with this
nkaradzhov Jul 14, 2025
21f46b9
refactor - remove proxy, use subclass
nkaradzhov Jul 14, 2025
8cda228
--wip-- [skip ci]
nkaradzhov Jul 15, 2025
05c9bb3
--wip-- [skip ci]
nkaradzhov Jul 16, 2025
aad8e62
--wip-- [skip ci]
nkaradzhov Jul 17, 2025
c6aa037
extract socket orchestration in separate class
nkaradzhov Jul 18, 2025
f041260
refactor - remove reference to client
nkaradzhov Jul 18, 2025
d5b9e87
remove logs
nkaradzhov Jul 18, 2025
e9c3a98
cosmetics
nkaradzhov Jul 18, 2025
085e74f
remove unused code
nkaradzhov Jul 18, 2025
7f0703c
refactor - extract utility method to wait for in-flight commands to c…
nkaradzhov Jul 18, 2025
89b40ed
rename flag
nkaradzhov Jul 24, 2025
6794c11
implement timeout relaxation
nkaradzhov Jul 25, 2025
0954800
add failover pns
nkaradzhov Jul 28, 2025
e8c147c
refactor: extract push handlers out of the queue
nkaradzhov Jul 28, 2025
81c4222
fix async fn
nkaradzhov Jul 29, 2025
20871ff
refactor: extract events
nkaradzhov Jul 31, 2025
4518073
unsure: apply timeout relaxation on MOVING as well
nkaradzhov Jul 31, 2025
8d85a8a
refactor: extract constant
nkaradzhov Aug 1, 2025
ad2b563
refactor: rework timeouts
nkaradzhov Aug 1, 2025
ccc1f9c
feat: add handshake command
nkaradzhov Aug 4, 2025
df5d555
add debug fn
nkaradzhov Aug 4, 2025
8b24251
fix: tls might not be provided
nkaradzhov Aug 4, 2025
71a12a9
remove none
nkaradzhov Aug 5, 2025
25b258f
comply to new pn format
nkaradzhov Aug 7, 2025
f46180a
debug moving endpoint type
nkaradzhov Aug 7, 2025
4fc2787
fix: remove unnecessary (un)relaxations
nkaradzhov Aug 7, 2025
01901a2
wording
nkaradzhov Aug 7, 2025
6596e57
fix ordering
nkaradzhov Aug 7, 2025
f64edfc
fix: correctly set timeout of new socket
nkaradzhov Aug 7, 2025
c6ce47c
comply with default values from hld
nkaradzhov Aug 7, 2025
4e80dc6
fix: default maint setting should be based on resp version
nkaradzhov Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 84 additions & 27 deletions packages/client/lib/client/commands-queue.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { SinglyLinkedList, DoublyLinkedNode, DoublyLinkedList } from './linked-list';
import { DoublyLinkedNode, DoublyLinkedList, EmptyAwareSinglyLinkedList } from './linked-list';
import encodeCommand from '../RESP/encoder';
import { Decoder, PUSH_TYPE_MAPPING, RESP_TYPES } from '../RESP/decoder';
import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/types';
import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub';
import { AbortError, ErrorReply, TimeoutError } from '../errors';
import { AbortError, ErrorReply, CommandTimeoutDuringMaintananceError, TimeoutError } from '../errors';
import { MonitorCallback } from '.';
import { dbgMaintenance } from './enterprise-maintenance-manager';

export interface CommandOptions<T = TypeMapping> {
chainId?: symbol;
Expand All @@ -30,6 +31,7 @@ export interface CommandToWrite extends CommandWaitingForReply {
timeout: {
signal: AbortSignal;
listener: () => unknown;
originalTimeout: number | undefined;
} | undefined;
}

Expand All @@ -50,22 +52,75 @@ const RESP2_PUSH_TYPE_MAPPING = {
[RESP_TYPES.SIMPLE_STRING]: Buffer
};

// Try to handle a push notification. Return whether you
// successfully consumed the notification or not. This is
// important in order for the queue to be able to pass the
// notification to another handler if the current one did not
// succeed.
type PushHandler = (pushItems: Array<any>) => boolean;

export default class RedisCommandsQueue {
readonly #respVersion;
readonly #maxLength;
readonly #toWrite = new DoublyLinkedList<CommandToWrite>();
readonly #waitingForReply = new SinglyLinkedList<CommandWaitingForReply>();
readonly #waitingForReply = new EmptyAwareSinglyLinkedList<CommandWaitingForReply>();
readonly #onShardedChannelMoved;
#chainInExecution: symbol | undefined;
readonly decoder;
readonly #pubSub = new PubSub();

#pushHandlers: PushHandler[] = [this.#onPush.bind(this)];

#inMaintenance = false;

set inMaintenance(value: boolean) {
this.#inMaintenance = value;
}

#maintenanceCommandTimeout: number | undefined

setMaintenanceCommandTimeout(ms: number | undefined) {
dbgMaintenance(`Setting maintenance command timeout to ${ms}`);
// Prevent possible api misuse
if (this.#maintenanceCommandTimeout === ms) return;

this.#maintenanceCommandTimeout = ms;

let counter = 0;

// Overwrite timeouts of all eligible toWrite commands
this.#toWrite.forEachNode(node => {
const command = node.value;

// Remove timeout listener if it exists
RedisCommandsQueue.#removeTimeoutListener(command)

// Determine newTimeout
const newTimeout = this.#maintenanceCommandTimeout ?? command.timeout?.originalTimeout;
// if no timeout is given and the command didnt have any timeout before, skip
if (!newTimeout) return;

counter++;

// Overwrite the command's timeout
const signal = AbortSignal.timeout(newTimeout);
command.timeout = {
signal,
listener: () => {
this.#toWrite.remove(node);
command.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(newTimeout) : new TimeoutError());
},
originalTimeout: command.timeout?.originalTimeout
};
signal.addEventListener('abort', command.timeout.listener, { once: true });
});
dbgMaintenance(`Total of ${counter} timeouts reset to ${ms}`);
}

get isPubSubActive() {
return this.#pubSub.isActive;
}

#invalidateCallback?: (key: RedisArgument | null) => unknown;

constructor(
respVersion: RespVersions,
maxLength: number | null | undefined,
Expand Down Expand Up @@ -107,6 +162,7 @@ export default class RedisCommandsQueue {
}
return true;
}
return false
}

#getTypeMapping() {
Expand All @@ -119,30 +175,27 @@ export default class RedisCommandsQueue {
onErrorReply: err => this.#onErrorReply(err),
//TODO: we can shave off a few cycles by not adding onPush handler at all if CSC is not used
onPush: push => {
if (!this.#onPush(push)) {
// currently only supporting "invalidate" over RESP3 push messages
switch (push[0].toString()) {
case "invalidate": {
if (this.#invalidateCallback) {
if (push[1] !== null) {
for (const key of push[1]) {
this.#invalidateCallback(key);
}
} else {
this.#invalidateCallback(null);
}
}
break;
}
}
for(const pushHandler of this.#pushHandlers) {
if(pushHandler(push)) return
}
},
getTypeMapping: () => this.#getTypeMapping()
});
}

setInvalidateCallback(callback?: (key: RedisArgument | null) => unknown) {
this.#invalidateCallback = callback;
addPushHandler(handler: PushHandler): void {
this.#pushHandlers.push(handler);
}

async waitForInflightCommandsToComplete(): Promise<void> {
// In-flight commands already completed
if(this.#waitingForReply.length === 0) {
return
};
// Otherwise wait for in-flight commands to fire `empty` event
return new Promise(resolve => {
this.#waitingForReply.events.on('empty', resolve)
});
}

addCommand<T>(
Expand All @@ -168,15 +221,19 @@ export default class RedisCommandsQueue {
typeMapping: options?.typeMapping
};

const timeout = options?.timeout;
// If #maintenanceCommandTimeout was explicitly set, we should
// use it instead of the timeout provided by the command
const timeout = this.#maintenanceCommandTimeout || options?.timeout
if (timeout) {

const signal = AbortSignal.timeout(timeout);
value.timeout = {
signal,
listener: () => {
this.#toWrite.remove(node);
value.reject(new TimeoutError());
}
value.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError());
},
originalTimeout: options?.timeout
};
signal.addEventListener('abort', value.timeout.listener, { once: true });
}
Expand Down Expand Up @@ -432,7 +489,7 @@ export default class RedisCommandsQueue {
}

static #removeTimeoutListener(command: CommandToWrite) {
command.timeout!.signal.removeEventListener('abort', command.timeout!.listener);
command.timeout?.signal.removeEventListener('abort', command.timeout!.listener);
}

static #flushToWrite(toBeSent: CommandToWrite, err: Error) {
Expand Down
Loading
Loading