ChainSafe · twoeths · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 15, 2024
@@ -2,13 +2,14 @@ import crypto from "node:crypto";
 import {toHexString} from "@chainsafe/ssz";
 import {testRunnerMemory} from "./testRunnerMemory.js";
 
-// Results in Linux Dec 2021
+// Results in Mac M1 Aug 2024
 //
-// Bytes32 toHexString()                        - 902.8 bytes / instance
-// Bytes32 Buffer.toString(hex)                 - 86.9 bytes / instance
-// Bytes32 Buffer.toString(hex) from Uint8Array - 87.6 bytes / instance
-// Bytes32 Buffer.toString(hex) + 0x            - 121.7 bytes / instance
-// Bytes32 randomBytes32Template()              - 924.7 bytes / instance
+// Bytes32 toHexString()                           - 903.4 bytes / instance
+// Bytes32 Buffer.toString(hex)                    - 90.2 bytes / instance
+// Bytes32 Buffer.toString(hex) from Uint8Array    - 89.1 bytes / instance
+// Bytes32 Buffer.toString(base64) from Uint8Array - 72.0 bytes / instance
+// Bytes32 Buffer.toString(hex) + 0x               - 119.7 bytes / instance
+// Bytes32 randomBytes32Template()                 - 924.9 bytes / instance
 
 testRunnerMemoryBpi([
   {
@@ -23,6 +24,10 @@ testRunnerMemoryBpi([
     id: "Bytes32 Buffer.toString(hex) from Uint8Array",
     getInstance: () => Buffer.from(randomBytesUint8Array(32)).toString("hex"),
   },
+  {
+    id: "Bytes32 Buffer.toString(base64) from Uint8Array",
+    getInstance: () => Buffer.from(randomBytesUint8Array(32)).toString("base64"),
+  },
   {
     id: "Bytes32 Buffer.toString(hex) + 0x",
     getInstance: () => "0x" + crypto.randomBytes(32).toString("hex"),

@@ -4,29 +4,36 @@ import {ValidatorIndex, phase0} from "@lodestar/types";
 export type Index2PubkeyCache = PublicKey[];
 
 type PubkeyHex = string;
+type PubkeyBase64 = string;
+const PUBKEY_BYTE_LENGTH = 48;
+const PUBKEY_HEX_CHAR_LENGTH = 96;
+
+/**
+ * BLSPubkey is of type Bytes48, we can use a single buffer to compute hex for all pubkeys
+ */
+const pubkeyBuf = Buffer.alloc(PUBKEY_BYTE_LENGTH);
 
 /**
  * toHexString() creates hex strings via string concatenation, which are very memory inefficient.
  * Memory benchmarks show that Buffer.toString("hex") produces strings with 10x less memory.
  *
- * Does not prefix to save memory, thus the prefix is removed from an already string representation.
+ * Aug 2024: using base64 is 33% more memory efficient than hex
  *
  * See https://github.com/ChainSafe/lodestar/issues/3446
  */
-function toMemoryEfficientHexStr(hex: Uint8Array | string): string {
-  if (typeof hex === "string") {
-    if (hex.startsWith("0x")) {
-      hex = hex.slice(2);
-    }
-    return hex;
+function toMemoryEfficientString(pubkey: Uint8Array): PubkeyBase64 {
+  if (pubkey.length === PUBKEY_BYTE_LENGTH) {
+    pubkeyBuf.set(pubkey);
+    return pubkeyBuf.toString("base64");
+  } else {
+    // only happens in unit tests
+    return Buffer.from(pubkey.buffer, pubkey.byteOffset, pubkey.byteLength).toString("base64");
   }
-
-  return Buffer.from(hex.buffer, hex.byteOffset, hex.byteLength).toString("hex");
 }
 
 export class PubkeyIndexMap {
-  // We don't really need the full pubkey. We could just use the first 20 bytes like an Ethereum address
-  readonly map = new Map<PubkeyHex, ValidatorIndex>();
+  // TODO: We don't really need the full pubkey. We could just use the first 20 bytes like an Ethereum address
+  readonly map = new Map<PubkeyBase64, ValidatorIndex>();
 
   get size(): number {
     return this.map.size;
@@ -36,11 +43,26 @@ export class PubkeyIndexMap {
    * Must support reading with string for API support where pubkeys are already strings
    */
   get(key: Uint8Array | PubkeyHex): ValidatorIndex | undefined {
-    return this.map.get(toMemoryEfficientHexStr(key));
+    if (typeof key === "string") {
+      if (key.startsWith("0x")) {
+        key = key.slice(2);
+      }
+      if (key.length === PUBKEY_HEX_CHAR_LENGTH) {
+        // we don't receive api requests frequently, so the below conversion to Buffer then base64 should not be an issue
+        pubkeyBuf.write(key, "hex");
+        return this.map.get(toMemoryEfficientString(pubkeyBuf));
+      } else {
+        // base64 is only for internal use, don't support it
+        return undefined;
+      }
+    }
+
+    // Uint8Array
+    return this.map.get(toMemoryEfficientString(key));
   }
 
   set(key: Uint8Array, value: ValidatorIndex): void {
-    this.map.set(toMemoryEfficientHexStr(key), value);
+    this.map.set(toMemoryEfficientString(key), value);
   }
 }
 

@@ -1,6 +1,6 @@
 import {MutableVector} from "@chainsafe/persistent-ts";
-import {testRunnerMemory} from "@lodestar/beacon-node/test/memory/testRunnerMemory";
 import {newZeroedArray} from "../../src/index.js";
+import {testRunnerMemoryBpi} from "./testRunnerMemory.js";
 
 // Results in Linux Feb 2022
 //
@@ -42,21 +42,3 @@ testRunnerMemoryBpi([
     },
   },
 ]);
-
-/**
- * Test bytes per instance in different representations of raw binary data
- */
-function testRunnerMemoryBpi(testCases: {getInstance: (bytes: number) => unknown; id: string}[]): void {
-  const longestId = Math.max(...testCases.map(({id}) => id.length));
-
-  for (const {id, getInstance} of testCases) {
-    const bpi = testRunnerMemory({
-      getInstance,
-      convergeFactor: 1 / 100,
-      sampleEvery: 5,
-    });
-
-    // eslint-disable-next-line no-console
-    console.log(`${id.padEnd(longestId)} - ${bpi.toFixed(1)} bytes / instance`);
-  }
-}
@@ -0,0 +1,21 @@
+import crypto from "node:crypto";
+import {PubkeyIndexMap} from "../../src/cache/pubkeyCache.js";
+import {testRunnerMemoryBpi} from "./testRunnerMemory.js";
+
+const vcArr = [500_000, 2_000_000];
+
+// Results in Mac M1 Aug 2024 using `node --expose-gc --loader=ts-node/esm pubkeyCache.ts`
+// PubkeyIndexMap PubkeyIndexMap 500000  - 54672689.8 bytes / instance
+// PubkeyIndexMap PubkeyIndexMap 2000000 - 218719267.4 bytes / instance
+testRunnerMemoryBpi(
+  vcArr.map((vc) => ({
+    id: `PubkeyIndexMap PubkeyIndexMap ${vc}`,
+    getInstance: () => {
+      const pubkeyCache = new PubkeyIndexMap();
+      for (let i = 0; i < vc; i++) {
+        pubkeyCache.set(crypto.randomBytes(48), i);
+      }
+      return pubkeyCache;
+    },
+  }))
+);
@@ -0,0 +1,237 @@
+export type TestRunnerMemoryOpts<T> = {
+  getInstance: (i: number) => T;
+  sampleEvery?: number;
+  maxRssBytes?: number;
+  maxInstances?: number;
+  computeUsedMemory?: (memoryUsage: NodeJS.MemoryUsage) => number;
+  logEachSample?: boolean;
+  convergeFactor?: number;
+};
+
+if (global.gc === undefined) {
+  throw Error("Must enable global.gc");
+}
+
+/**
+ * Test bytes per instance in different representations of raw binary data
+ */
+export function testRunnerMemoryBpi(testCases: {getInstance: (bytes: number) => unknown; id: string}[]): void {
+  const longestId = Math.max(...testCases.map(({id}) => id.length));
+
+  for (const {id, getInstance} of testCases) {
+    const bpi = testRunnerMemory({
+      getInstance,
+      convergeFactor: 1 / 100,
+      sampleEvery: 5,
+    });
+
+    // eslint-disable-next-line no-console
+    console.log(`${id.padEnd(longestId)} - ${bpi.toFixed(1)} bytes / instance`);
+  }
+}
+
+export async function testRunnerMemoryGc<T>(opts: TestRunnerMemoryOpts<T>): Promise<void> {
+  const {
+    getInstance,
+    /**
+     * How to compute the total memory usage.
+     * Defaults to `heapUsed + external`.
+     * https://nodejs.org/api/process.html#processmemoryusage
+     */
+    computeUsedMemory = (memoryUsage) => memoryUsage.heapUsed + memoryUsage.external,
+  } = opts;
+
+  const rounds = 10;
+  const instancesPerRound = 1000;
+  const xs: number[] = [];
+  const usedMemoryArr: number[] = [];
+
+  for (let n = 0; n < rounds; n++) {
+    global.gc?.();
+    global.gc?.();
+    await new Promise((r) => setTimeout(r, 100));
+    global.gc?.();
+    global.gc?.();
+
+    const totalUsedMemoryPrev = computeUsedMemory(process.memoryUsage());
+
+    const refs: T[] = [];
+    for (let i = 0; i < instancesPerRound; i++) {
+      refs.push(getInstance(i));
+    }
+
+    global.gc?.();
+    global.gc?.();
+    await new Promise((r) => setTimeout(r, 100));
+    global.gc?.();
+    global.gc?.();
+
+    const totalUsedMemory = computeUsedMemory(process.memoryUsage());
+
+    const totalUsedMemoryDiff = totalUsedMemory - totalUsedMemoryPrev;
+    refs.push(null as any);
+
+    xs.push(n);
+    usedMemoryArr.push(totalUsedMemoryDiff);
+
+    const usedMemoryReg = linearRegression(xs, usedMemoryArr);
+    // eslint-disable-next-line no-console
+    console.log("totalUsedMemoryDiff", totalUsedMemoryDiff, usedMemoryReg);
+  }
+}
+
+export function testRunnerMemory<T>(opts: TestRunnerMemoryOpts<T>): number {
+  const {
+    getInstance,
+    /**
+     * Sample memory usage every `sampleEvery` instances
+     */
+    sampleEvery = 1000,
+    /**
+     * Stop when `process.memoryUsage().rss > maxRssBytes`.
+     */
+    maxRssBytes = 2e9,
+    /**
+     * Stop after creating `maxInstances` instances.
+     */
+    maxInstances = Infinity,
+    /**
+     * How to compute the total memory usage.
+     * Defaults to `heapUsed + external`.
+     * https://nodejs.org/api/process.html#processmemoryusage
+     */
+    computeUsedMemory = (memoryUsage) => memoryUsage.heapUsed + memoryUsage.external,
+    logEachSample,
+    convergeFactor = 0.2 / 100, // 0.2%
+  } = opts;
+
+  const refs: T[] = [];
+  const xs: number[] = [];
+  const usedMemoryArr: number[] = [];
+
+  let prevM0 = 0;
+  let prevM1 = 0;
+
+  for (let i = 0; i < maxInstances; i++) {
+    refs.push(getInstance(i));
+
+    // Stores 5 floating point numbers every 5000 pushes to refs.
+    // The added memory should be negligible against refs, and linearRegression
+    // local vars will get garbage collected and won't show up in the .m result
+
+    if (i % sampleEvery === 0) {
+      global.gc?.();
+      global.gc?.();
+
+      const memoryUsage = process.memoryUsage();
+      const usedMemory = computeUsedMemory(memoryUsage);
+
+      xs.push(i);
+      usedMemoryArr.push(usedMemory);
+
+      if (usedMemoryArr.length > 1) {
+        // When is a good time to stop a benchmark? A naive answer is after N milliseconds or M runs.
+        // This code aims to stop the benchmark when the average fn run time has converged at a value
+        // within a given convergence factor. To prevent doing expensive math to often for fast fn,
+        // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
+        // a very rough linear and quadratic convergence.
+        const m = linearRegression(xs, usedMemoryArr).m;
+
+        // Compute convergence (1st order + 2nd order)
+        const a = prevM0;
+        const b = prevM1;
+        const c = m;
+
+        // Approx linear convergence
+        const convergence1 = Math.abs(c - a);
+        // Approx quadratic convergence
+        const convergence2 = Math.abs(b - (a + c) / 2);
+        // Take the greater of both to enforce linear and quadratic are below convergeFactor
+        const convergence = Math.max(convergence1, convergence2) / a;
+
+        // Okay to stop + has converged, stop now
+        if (convergence < convergeFactor) {
+          return m;
+        }
+
+        if (logEachSample) {
+          // eslint-disable-next-line no-console
+          console.log(i, memoryUsage.rss / maxRssBytes, {m});
+        }
+
+        prevM0 = prevM1;
+        prevM1 = m;
+      }
+    }
+  }
+
+  return linearRegression(xs, usedMemoryArr).m;
+}
+
+/**
+ * From https://github.com/simple-statistics/simple-statistics/blob/d0d177baf74976a2421638bce98ab028c5afb537/src/linear_regression.js
+ *
+ * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
+ * is a simple way to find a fitted line between a set of coordinates.
+ * This algorithm finds the slope and y-intercept of a regression line
+ * using the least sum of squares.
+ *
+ * @param data an array of two-element of arrays,
+ * like `[[0, 1], [2, 3]]`
+ * @returns object containing slope and intersect of regression line
+ * @example
+ * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }
+ */
+export function linearRegression(xs: number[], ys: number[]): {m: number; b: number} {
+  let m: number, b: number;
+
+  // Store data length in a local variable to reduce
+  // repeated object property lookups
+  const dataLength = xs.length;
+
+  //if there's only one point, arbitrarily choose a slope of 0
+  //and a y-intercept of whatever the y of the initial point is
+  if (dataLength === 1) {
+    m = 0;
+    b = ys[0];
+  } else {
+    // Initialize our sums and scope the `m` and `b`
+    // variables that define the line.
+    let sumX = 0,
+      sumY = 0,
+      sumXX = 0,
+      sumXY = 0;
+
+    // Use local variables to grab point values
+    // with minimal object property lookups
+    let x: number, y: number;
+
+    // Gather the sum of all x values, the sum of all
+    // y values, and the sum of x^2 and (x*y) for each
+    // value.
+    //
+    // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
+    for (let i = 0; i < dataLength; i++) {
+      x = xs[i];
+      y = ys[i];
+
+      sumX += x;
+      sumY += y;
+
+      sumXX += x * x;
+      sumXY += x * y;
+    }
+
+    // `m` is the slope of the regression line
+    m = (dataLength * sumXY - sumX * sumY) / (dataLength * sumXX - sumX * sumX);
+
+    // `b` is the y-intercept of the line.
+    b = sumY / dataLength - (m * sumX) / dataLength;
+  }
+
+  // Return both values as an object.
+  return {
+    m: m,
+    b: b,
+  };
+}