diff --git a/perf/efps/helpers/aggregateLatencies.ts b/perf/efps/helpers/aggregateLatencies.ts index 9611060cae6..c058d36785b 100644 --- a/perf/efps/helpers/aggregateLatencies.ts +++ b/perf/efps/helpers/aggregateLatencies.ts @@ -22,7 +22,7 @@ function calculatePercentile(numbers: number[], percentile: number): number { return lowerValue + (upperValue - lowerValue) * fraction } -function calculateError(numbers: number[]) { +function calculateSpread(numbers: number[]) { const mean = numbers.reduce((sum, num) => sum + num, 0) / numbers.length // calculate the sum of squared differences from the mean @@ -40,7 +40,7 @@ function calculateError(numbers: number[]) { export function aggregateLatencies(values: number[]): EfpsResult['latency'] { return { median: calculatePercentile(values, 0.5), - error: calculateError(values), + spread: calculateSpread(values), p75: calculatePercentile(values, 0.75), p90: calculatePercentile(values, 0.9), p99: calculatePercentile(values, 0.99), diff --git a/perf/efps/index.ts b/perf/efps/index.ts index 65b5f46142f..57e68b03acc 100644 --- a/perf/efps/index.ts +++ b/perf/efps/index.ts @@ -1,4 +1,3 @@ -/* eslint-disable max-depth */ /* eslint-disable no-console */ // eslint-disable-next-line import/no-unassigned-import import 'dotenv/config' @@ -21,11 +20,12 @@ import recipe from './tests/recipe/recipe' import synthetic from './tests/synthetic/synthetic' import {type EfpsAbResult, type EfpsResult, type EfpsTest} from './types' -const warningThreshold = 0.2 -const testAttemptCount = process.env.CI ? 3 : 1 +const WARNING_THRESHOLD = 0.2 +const TEST_ATTEMPTS = process.env.CI ? 3 : 1 -const headless = true -const tests = [article, recipe, synthetic] +const HEADLESS = true +const REFERENCE_TAG = 'latest' +const TESTS = [article, recipe, synthetic] const projectId = process.env.VITE_PERF_EFPS_PROJECT_ID! const dataset = process.env.VITE_PERF_EFPS_DATASET! @@ -77,14 +77,6 @@ const getSanityPkgPathForTag = async (tag: string) => { return path.join(tmpDir, 'node_modules', 'sanity') } -const formatFps = (fps: number) => { - const rounded = fps.toFixed(1) - if (fps >= 100) return chalk.green('99.9+') - if (fps >= 60) return chalk.green(rounded) - if (fps >= 20) return chalk.yellow(rounded) - return chalk.red(rounded) -} - const formatEfps = (latencyMs: number) => { const efps = 1000 / latencyMs const rounded = efps.toFixed(1) @@ -105,14 +97,6 @@ const formatPercentageChange = (experiment: number, reference: number): string = return `${sign}${rounded}%` } -const formatPercentage = (delta: number): string => { - const percentage = delta * 100 - const rounded = percentage.toFixed(1) - const sign = delta >= 0 ? '+' : '' - if (delta >= -warningThreshold) return `${sign}${rounded}%` - return chalk.red(`${sign}${rounded}%`) -} - // For markdown formatting without colors const formatEfpsPlain = (latencyMs: number) => { const efps = 1000 / latencyMs @@ -122,18 +106,9 @@ const formatEfpsPlain = (latencyMs: number) => { return rounded } -const formatPercentagePlain = (delta: number): string => { - const percentage = delta * 100 - const rounded = percentage.toFixed(1) - const sign = delta >= 0 ? '+' : '' - return `${sign}${rounded}%` -} - -// START - const spinner = Ora() -spinner.info(`Running ${tests.length} tests: ${tests.map((t) => `'${t.name}'`).join(', ')}`) +spinner.info(`Running ${TESTS.length} tests: ${TESTS.map((t) => `'${t.name}'`).join(', ')}`) await exec({ text: ['Building the monorepo…', 'Built monorepo'], @@ -150,8 +125,8 @@ await exec({ const localSanityPkgPath = path.dirname(fileURLToPath(import.meta.resolve('sanity/package.json'))) -const referenceSanityPkgPath = await getSanityPkgPathForTag('v3.57.4') -const experimentSanityPkgPath = await getSanityPkgPathForTag('v3.58.0') +const referenceSanityPkgPath = await getSanityPkgPathForTag(REFERENCE_TAG) +const experimentSanityPkgPath = localSanityPkgPath function mergeResults(baseResults: EfpsResult[] | undefined, incomingResults: EfpsResult[]) { if (!baseResults) return incomingResults @@ -178,15 +153,16 @@ async function runAbTest(test: EfpsTest) { let referenceResults: EfpsResult[] | undefined let experimentResults: EfpsResult[] | undefined - for (let attempt = 0; attempt < testAttemptCount; attempt++) { + for (let attempt = 0; attempt < TEST_ATTEMPTS; attempt++) { referenceResults = mergeResults( referenceResults, await runTest({ prefix: `running ${referenceSanityPkgPath}`, + key: 'reference', test, resultsDir, client, - headless, + headless: HEADLESS, projectId, sanityPkgPath: referenceSanityPkgPath, log: () => {}, @@ -197,10 +173,11 @@ async function runAbTest(test: EfpsTest) { experimentResults, await runTest({ prefix: `running ${experimentSanityPkgPath}`, + key: 'experiment', test, resultsDir, client, - headless, + headless: HEADLESS, projectId, sanityPkgPath: experimentSanityPkgPath, log: () => {}, @@ -216,25 +193,36 @@ async function runAbTest(test: EfpsTest) { ) } -for (let i = 0; i < tests.length; i++) { - const test = tests[i] +for (let i = 0; i < TESTS.length; i++) { + const test = TESTS[i] testResults.push({ name: test.name, results: await runAbTest(test), }) } -const table = new Table({ - head: [chalk.bold('Benchmark'), 'reference', 'experiment', 'Δ (%)', ''].map((cell) => - chalk.cyan(cell), - ), +const comparisonTableCli = new Table({ + head: ['Benchmark', 'reference', 'experiment', 'Δ (%)', ''].map((cell) => chalk.cyan(cell)), }) +const detailedInformationCliHead = [ + 'Benchmark', + 'latency', + 'p75', + 'p90', + 'p99', + 'blocking time', + 'test duration', +].map((i) => chalk.cyan(i)) + +const referenceTableCli = new Table({head: detailedInformationCliHead}) +const experimentTableCli = new Table({head: detailedInformationCliHead}) + function isSignificantlyDifferent(experiment: number, reference: number) { // values are too small to and are already performing well if (experiment < 16 && reference < 16) return false const delta = (experiment - reference) / reference - return delta >= warningThreshold + return delta >= WARNING_THRESHOLD } for (const {name, results} of testResults) { @@ -251,21 +239,51 @@ for (const {name, results} of testResults) { reference.latency.median, ) - table.push([ - `${name} (${experiment.label})`, + const benchmarkName = `${name} (${experiment.label})` + + comparisonTableCli.push([ + benchmarkName, `${formatEfps(reference.latency.median)} efps (${reference.latency.median.toFixed(0)}ms)`, `${formatEfps(experiment.latency.median)} efps (${experiment.latency.median.toFixed(0)}ms)`, `${significantlyDifferent ? chalk.red(msDifference) : msDifference} (${percentageChange})`, significantlyDifferent ? '🔴' : '✅', ]) + + referenceTableCli.push([ + benchmarkName, + `${reference.latency.median.toFixed(0)}±${reference.latency.spread.toFixed(0)}ms`, + `${reference.latency.p75.toFixed(0)}ms`, + `${reference.latency.p90.toFixed(0)}ms`, + `${reference.latency.p99.toFixed(0)}ms`, + `${reference.blockingTime.toFixed(0)}ms`, + `${(reference.runDuration / 1000).toFixed(1)}s`, + ]) + + experimentTableCli.push([ + benchmarkName, + `${experiment.latency.median.toFixed(0)}±${experiment.latency.spread.toFixed(0)}ms`, + `${experiment.latency.p75.toFixed(0)}ms`, + `${experiment.latency.p90.toFixed(0)}ms`, + `${experiment.latency.p99.toFixed(0)}ms`, + `${experiment.blockingTime.toFixed(0)}ms`, + `${(experiment.runDuration / 1000).toFixed(1)}s`, + ]) } } -console.log(table.toString()) +console.log() +console.log('Reference vs experiment') +console.log(comparisonTableCli.toString()) +console.log() +console.log('Reference result') +console.log(referenceTableCli.toString()) +console.log() +console.log('Experiment result') +console.log(experimentTableCli.toString()) let comparisonTable = ` -| | Benchmark | reference
input latency | experiment
input latency | Δ (%)
latency difference | -| --- | :-- | :-- | :-- | :-- | +| Benchmark | reference
input latency | experiment
input latency | Δ (%)
latency difference | | +| :-- | :-- | :-- | :-- | --- | ` const detailedInformationHeader = ` @@ -293,8 +311,6 @@ for (const {name, results} of testResults) { const benchmarkName = `${name} (${experiment.label})` comparisonTable += - // status - `| ${significantlyDifferent ? '🔴' : '✅'} ` + // benchmark name `| ${benchmarkName} ` + // reference latency @@ -302,13 +318,16 @@ for (const {name, results} of testResults) { // experiment latency `| ${formatEfpsPlain(experiment.latency.median)} efps (${experiment.latency.median.toFixed(0)}ms) ` + // difference - `| ${msDifference} (${percentageChange}) |\n` + `| ${msDifference} (${percentageChange}) ` + + // status + `| ${significantlyDifferent ? '🔴' : '✅'} ` + + `|\n` referenceTable += // benchmark name `| ${benchmarkName} ` + // latency - `| ${reference.latency.median.toFixed(0)}±${reference.latency.error.toFixed(0)}ms ` + + `| ${reference.latency.median.toFixed(0)}±${reference.latency.spread.toFixed(0)}ms ` + // p75 `| ${reference.latency.p75.toFixed(0)}ms ` + // p90 @@ -325,7 +344,7 @@ for (const {name, results} of testResults) { // benchmark name `| ${benchmarkName} ` + // latency - `| ${experiment.latency.median.toFixed(0)}±${experiment.latency.error.toFixed(0)}ms ` + + `| ${experiment.latency.median.toFixed(0)}±${experiment.latency.spread.toFixed(0)}ms ` + // p75 `| ${experiment.latency.p75.toFixed(0)}ms ` + // p90 @@ -346,7 +365,7 @@ Updated ${new Date().toUTCString()} ${comparisonTable} -> **efps** — editor "frames per second". The number of updates assumed to be possible within a second. +> **efps** — editor "frames per second". The number of updates assumed to be possible within a second. > > Derived from input latency. \`efps = 1000 / input_latency\` @@ -356,7 +375,7 @@ ${comparisonTable} ### 🏠 Reference result -The performance result of \`sanity@latest\` +The performance result of \`sanity@${REFERENCE_TAG}\` ${referenceTable} @@ -372,7 +391,7 @@ ${experimentTable} > #### column definitions > > - **benchmark** — the name of the test, e.g. "article", followed by the label of the field being measured, e.g. "(title)". -> - **latency** — the time between when a key was pressed and when it was rendered. derived from a set of samples. the median (p50) is shown along with a margin of error. +> - **latency** — the time between when a key was pressed and when it was rendered. derived from a set of samples. the median (p50) is shown along with its spread. > - **p75** — the 75th percentile of the input latency in the test run. 75% of the sampled inputs in this benchmark were processed faster than this value. this provides insight into the upper range of typical performance. > - **p90** — the 90th percentile of the input latency in the test run. 90% of the sampled inputs were faster than this. this metric helps identify slower interactions that occurred less frequently during the benchmark. > - **p99** — the 99th percentile of the input latency in the test run. only 1% of sampled inputs were slower than this. this represents the worst-case scenarios encountered during the benchmark, useful for identifying potential performance outliers. diff --git a/perf/efps/runTest.ts b/perf/efps/runTest.ts index 722c53516ed..89b272c5132 100644 --- a/perf/efps/runTest.ts +++ b/perf/efps/runTest.ts @@ -10,7 +10,6 @@ import sourcemaps from 'rollup-plugin-sourcemaps' import handler from 'serve-handler' import * as vite from 'vite' -// TODO: add test duration to metrics import {type EfpsResult, type EfpsTest, type EfpsTestRunnerContext} from './types' const workspaceDir = path.dirname(fileURLToPath(import.meta.url)) @@ -24,6 +23,7 @@ interface RunTestOptions { headless: boolean client: SanityClient sanityPkgPath: string + key: string log: (text: string) => void } @@ -36,6 +36,7 @@ export async function runTest({ headless, client, sanityPkgPath, + key, log, }: RunTestOptions): Promise { console.log(prefix) @@ -45,9 +46,8 @@ export async function runTest({ // spinner.start(prefix) - const versionLabel = sanityPkgPath ? 'latest' : 'local' - const outDir = path.join(workspaceDir, 'builds', test.name, versionLabel) - const testResultsDir = path.join(resultsDir, test.name, versionLabel) + const outDir = path.join(workspaceDir, 'builds', test.name, key) + const testResultsDir = path.join(resultsDir, test.name, key) await fs.promises.mkdir(outDir, {recursive: true}) log('Building…') diff --git a/perf/efps/types.ts b/perf/efps/types.ts index c4d71c8b61c..ad9df28fb3d 100644 --- a/perf/efps/types.ts +++ b/perf/efps/types.ts @@ -21,7 +21,7 @@ export interface EfpsResult { blockingTime: number latency: { median: number - error: number + spread: number p75: number p90: number p99: number