Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cache pubkeys using base64 #7022

Draft
wants to merge 5 commits into
base: unstable
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions packages/beacon-node/test/memory/bytesHex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ import crypto from "node:crypto";
import {toHexString} from "@chainsafe/ssz";
import {testRunnerMemory} from "./testRunnerMemory.js";

// Results in Linux Dec 2021
// Results in Mac M1 Aug 2024
//
// Bytes32 toHexString() - 902.8 bytes / instance
// Bytes32 Buffer.toString(hex) - 86.9 bytes / instance
// Bytes32 Buffer.toString(hex) from Uint8Array - 87.6 bytes / instance
// Bytes32 Buffer.toString(hex) + 0x - 121.7 bytes / instance
// Bytes32 randomBytes32Template() - 924.7 bytes / instance
// Bytes32 toHexString() - 903.4 bytes / instance
// Bytes32 Buffer.toString(hex) - 90.2 bytes / instance
// Bytes32 Buffer.toString(hex) from Uint8Array - 89.1 bytes / instance
// Bytes32 Buffer.toString(base64) from Uint8Array - 72.0 bytes / instance
// Bytes32 Buffer.toString(hex) + 0x - 119.7 bytes / instance
// Bytes32 randomBytes32Template() - 924.9 bytes / instance

testRunnerMemoryBpi([
{
Expand All @@ -23,6 +24,10 @@ testRunnerMemoryBpi([
id: "Bytes32 Buffer.toString(hex) from Uint8Array",
getInstance: () => Buffer.from(randomBytesUint8Array(32)).toString("hex"),
},
{
id: "Bytes32 Buffer.toString(base64) from Uint8Array",
getInstance: () => Buffer.from(randomBytesUint8Array(32)).toString("base64"),
},
{
id: "Bytes32 Buffer.toString(hex) + 0x",
getInstance: () => "0x" + crypto.randomBytes(32).toString("hex"),
Expand Down
48 changes: 35 additions & 13 deletions packages/state-transition/src/cache/pubkeyCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,36 @@ import {ValidatorIndex, phase0} from "@lodestar/types";
export type Index2PubkeyCache = PublicKey[];

type PubkeyHex = string;
type PubkeyBase64 = string;
const PUBKEY_BYTE_LENGTH = 48;
const PUBKEY_HEX_CHAR_LENGTH = 96;

/**
* BLSPubkey is of type Bytes48, we can use a single buffer to compute hex for all pubkeys
*/
const pubkeyBuf = Buffer.alloc(PUBKEY_BYTE_LENGTH);

/**
* toHexString() creates hex strings via string concatenation, which are very memory inefficient.
* Memory benchmarks show that Buffer.toString("hex") produces strings with 10x less memory.
*
* Does not prefix to save memory, thus the prefix is removed from an already string representation.
* Aug 2024: using base64 is 33% more memory efficient than hex
*
* See https://github.com/ChainSafe/lodestar/issues/3446
*/
function toMemoryEfficientHexStr(hex: Uint8Array | string): string {
if (typeof hex === "string") {
if (hex.startsWith("0x")) {
hex = hex.slice(2);
}
return hex;
function toMemoryEfficientString(pubkey: Uint8Array): PubkeyBase64 {
if (pubkey.length === PUBKEY_BYTE_LENGTH) {
pubkeyBuf.set(pubkey);
return pubkeyBuf.toString("base64");
} else {
// only happens in unit tests
return Buffer.from(pubkey.buffer, pubkey.byteOffset, pubkey.byteLength).toString("base64");
}

return Buffer.from(hex.buffer, hex.byteOffset, hex.byteLength).toString("hex");
}

export class PubkeyIndexMap {
// We don't really need the full pubkey. We could just use the first 20 bytes like an Ethereum address
readonly map = new Map<PubkeyHex, ValidatorIndex>();
// TODO: We don't really need the full pubkey. We could just use the first 20 bytes like an Ethereum address
readonly map = new Map<PubkeyBase64, ValidatorIndex>();

get size(): number {
return this.map.size;
Expand All @@ -36,11 +43,26 @@ export class PubkeyIndexMap {
* Must support reading with string for API support where pubkeys are already strings
*/
get(key: Uint8Array | PubkeyHex): ValidatorIndex | undefined {
return this.map.get(toMemoryEfficientHexStr(key));
if (typeof key === "string") {
if (key.startsWith("0x")) {
key = key.slice(2);
}
if (key.length === PUBKEY_HEX_CHAR_LENGTH) {
// we don't receive api requests frequently, so the below conversion to Buffer then base64 should not be an issue
pubkeyBuf.write(key, "hex");
return this.map.get(toMemoryEfficientString(pubkeyBuf));
} else {
// base64 is only for internal use, don't support it
return undefined;
}
}

// Uint8Array
return this.map.get(toMemoryEfficientString(key));
}

set(key: Uint8Array, value: ValidatorIndex): void {
this.map.set(toMemoryEfficientHexStr(key), value);
this.map.set(toMemoryEfficientString(key), value);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {MutableVector} from "@chainsafe/persistent-ts";
import {testRunnerMemory} from "@lodestar/beacon-node/test/memory/testRunnerMemory";
import {newZeroedArray} from "../../src/index.js";
import {testRunnerMemoryBpi} from "./testRunnerMemory.js";

// Results in Linux Feb 2022
//
Expand Down Expand Up @@ -42,21 +42,3 @@ testRunnerMemoryBpi([
},
},
]);

/**
* Test bytes per instance in different representations of raw binary data
*/
function testRunnerMemoryBpi(testCases: {getInstance: (bytes: number) => unknown; id: string}[]): void {
const longestId = Math.max(...testCases.map(({id}) => id.length));

for (const {id, getInstance} of testCases) {
const bpi = testRunnerMemory({
getInstance,
convergeFactor: 1 / 100,
sampleEvery: 5,
});

// eslint-disable-next-line no-console
console.log(`${id.padEnd(longestId)} - ${bpi.toFixed(1)} bytes / instance`);
}
}
21 changes: 21 additions & 0 deletions packages/state-transition/test/memory/pubkeyCache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import crypto from "node:crypto";
import {PubkeyIndexMap} from "../../src/cache/pubkeyCache.js";
import {testRunnerMemoryBpi} from "./testRunnerMemory.js";

const vcArr = [500_000, 2_000_000];

// Results in Mac M1 Aug 2024 using `node --expose-gc --loader=ts-node/esm pubkeyCache.ts`
// PubkeyIndexMap PubkeyIndexMap 500000 - 54672689.8 bytes / instance
// PubkeyIndexMap PubkeyIndexMap 2000000 - 218719267.4 bytes / instance
testRunnerMemoryBpi(
vcArr.map((vc) => ({
id: `PubkeyIndexMap PubkeyIndexMap ${vc}`,
getInstance: () => {
const pubkeyCache = new PubkeyIndexMap();
for (let i = 0; i < vc; i++) {
pubkeyCache.set(crypto.randomBytes(48), i);
}
return pubkeyCache;
},
}))
);
237 changes: 237 additions & 0 deletions packages/state-transition/test/memory/testRunnerMemory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
export type TestRunnerMemoryOpts<T> = {
getInstance: (i: number) => T;
sampleEvery?: number;
maxRssBytes?: number;
maxInstances?: number;
computeUsedMemory?: (memoryUsage: NodeJS.MemoryUsage) => number;
logEachSample?: boolean;
convergeFactor?: number;
};

if (global.gc === undefined) {
throw Error("Must enable global.gc");
}

/**
* Test bytes per instance in different representations of raw binary data
*/
export function testRunnerMemoryBpi(testCases: {getInstance: (bytes: number) => unknown; id: string}[]): void {
const longestId = Math.max(...testCases.map(({id}) => id.length));

for (const {id, getInstance} of testCases) {
const bpi = testRunnerMemory({
getInstance,
convergeFactor: 1 / 100,
sampleEvery: 5,
});

// eslint-disable-next-line no-console
console.log(`${id.padEnd(longestId)} - ${bpi.toFixed(1)} bytes / instance`);
}
}

export async function testRunnerMemoryGc<T>(opts: TestRunnerMemoryOpts<T>): Promise<void> {
const {
getInstance,
/**
* How to compute the total memory usage.
* Defaults to `heapUsed + external`.
* https://nodejs.org/api/process.html#processmemoryusage
*/
computeUsedMemory = (memoryUsage) => memoryUsage.heapUsed + memoryUsage.external,
} = opts;

const rounds = 10;
const instancesPerRound = 1000;
const xs: number[] = [];
const usedMemoryArr: number[] = [];

for (let n = 0; n < rounds; n++) {
global.gc?.();
global.gc?.();
await new Promise((r) => setTimeout(r, 100));
global.gc?.();
global.gc?.();

const totalUsedMemoryPrev = computeUsedMemory(process.memoryUsage());

const refs: T[] = [];
for (let i = 0; i < instancesPerRound; i++) {
refs.push(getInstance(i));
}

global.gc?.();
global.gc?.();
await new Promise((r) => setTimeout(r, 100));
global.gc?.();
global.gc?.();

const totalUsedMemory = computeUsedMemory(process.memoryUsage());

const totalUsedMemoryDiff = totalUsedMemory - totalUsedMemoryPrev;
refs.push(null as any);

xs.push(n);
usedMemoryArr.push(totalUsedMemoryDiff);

const usedMemoryReg = linearRegression(xs, usedMemoryArr);
// eslint-disable-next-line no-console
console.log("totalUsedMemoryDiff", totalUsedMemoryDiff, usedMemoryReg);
}
}

export function testRunnerMemory<T>(opts: TestRunnerMemoryOpts<T>): number {
const {
getInstance,
/**
* Sample memory usage every `sampleEvery` instances
*/
sampleEvery = 1000,
/**
* Stop when `process.memoryUsage().rss > maxRssBytes`.
*/
maxRssBytes = 2e9,
/**
* Stop after creating `maxInstances` instances.
*/
maxInstances = Infinity,
/**
* How to compute the total memory usage.
* Defaults to `heapUsed + external`.
* https://nodejs.org/api/process.html#processmemoryusage
*/
computeUsedMemory = (memoryUsage) => memoryUsage.heapUsed + memoryUsage.external,
logEachSample,
convergeFactor = 0.2 / 100, // 0.2%
} = opts;

const refs: T[] = [];
const xs: number[] = [];
const usedMemoryArr: number[] = [];

let prevM0 = 0;
let prevM1 = 0;

for (let i = 0; i < maxInstances; i++) {
refs.push(getInstance(i));

// Stores 5 floating point numbers every 5000 pushes to refs.
// The added memory should be negligible against refs, and linearRegression
// local vars will get garbage collected and won't show up in the .m result

if (i % sampleEvery === 0) {
global.gc?.();
global.gc?.();

const memoryUsage = process.memoryUsage();
const usedMemory = computeUsedMemory(memoryUsage);

xs.push(i);
usedMemoryArr.push(usedMemory);

if (usedMemoryArr.length > 1) {
// When is a good time to stop a benchmark? A naive answer is after N milliseconds or M runs.
// This code aims to stop the benchmark when the average fn run time has converged at a value
// within a given convergence factor. To prevent doing expensive math to often for fast fn,
// it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
// a very rough linear and quadratic convergence.
const m = linearRegression(xs, usedMemoryArr).m;

// Compute convergence (1st order + 2nd order)
const a = prevM0;
const b = prevM1;
const c = m;

// Approx linear convergence
const convergence1 = Math.abs(c - a);
// Approx quadratic convergence
const convergence2 = Math.abs(b - (a + c) / 2);
// Take the greater of both to enforce linear and quadratic are below convergeFactor
const convergence = Math.max(convergence1, convergence2) / a;

// Okay to stop + has converged, stop now
if (convergence < convergeFactor) {
return m;
}

if (logEachSample) {
// eslint-disable-next-line no-console
console.log(i, memoryUsage.rss / maxRssBytes, {m});
}

prevM0 = prevM1;
prevM1 = m;
}
}
}

return linearRegression(xs, usedMemoryArr).m;
}

/**
* From https://github.com/simple-statistics/simple-statistics/blob/d0d177baf74976a2421638bce98ab028c5afb537/src/linear_regression.js
*
* [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
* is a simple way to find a fitted line between a set of coordinates.
* This algorithm finds the slope and y-intercept of a regression line
* using the least sum of squares.
*
* @param data an array of two-element of arrays,
* like `[[0, 1], [2, 3]]`
* @returns object containing slope and intersect of regression line
* @example
* linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }
*/
export function linearRegression(xs: number[], ys: number[]): {m: number; b: number} {
let m: number, b: number;

// Store data length in a local variable to reduce
// repeated object property lookups
const dataLength = xs.length;

//if there's only one point, arbitrarily choose a slope of 0
//and a y-intercept of whatever the y of the initial point is
if (dataLength === 1) {
m = 0;
b = ys[0];
} else {
// Initialize our sums and scope the `m` and `b`
// variables that define the line.
let sumX = 0,
sumY = 0,
sumXX = 0,
sumXY = 0;

// Use local variables to grab point values
// with minimal object property lookups
let x: number, y: number;

// Gather the sum of all x values, the sum of all
// y values, and the sum of x^2 and (x*y) for each
// value.
//
// In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
for (let i = 0; i < dataLength; i++) {
x = xs[i];
y = ys[i];

sumX += x;
sumY += y;

sumXX += x * x;
sumXY += x * y;
}

// `m` is the slope of the regression line
m = (dataLength * sumXY - sumX * sumY) / (dataLength * sumXX - sumX * sumX);

// `b` is the y-intercept of the line.
b = sumY / dataLength - (m * sumX) / dataLength;
}

// Return both values as an object.
return {
m: m,
b: b,
};
}
Loading
Loading