diff --git a/src/__tests__/__snapshots__/index.test.ts.snap b/src/__tests__/__snapshots__/index.test.ts.snap index bbef2f6a..e3706d61 100644 --- a/src/__tests__/__snapshots__/index.test.ts.snap +++ b/src/__tests__/__snapshots__/index.test.ts.snap @@ -105,6 +105,7 @@ exports[`test existence of exported functions 1`] = ` "xyRealMaxYPoint", "xyRealMinYPoint", "xyReduce", + "xyReduceNonContinuous", "xyRolling", "xySetYValue", "xySortX", diff --git a/src/xy/__tests__/xyJoinX.test.ts b/src/xy/__tests__/xyJoinX.test.ts index 19dc6371..17196d75 100644 --- a/src/xy/__tests__/xyJoinX.test.ts +++ b/src/xy/__tests__/xyJoinX.test.ts @@ -29,6 +29,14 @@ test('no join', () => { }); }); +test('join 0.1', () => { + const data = { x: [0.2, 0.25, 1], y: [1, 1, 1] }; + expect(xyJoinX(data, { delta: 0.2 })).toStrictEqual({ + x: [0.225, 1], + y: [2, 1], + }); +}); + test('full join', () => { const data = { x: [2, 4, 6], y: [1, 1, 1] }; expect(xyJoinX(data, { delta: 5 })).toStrictEqual({ diff --git a/src/xy/__tests__/xyReduceNonContinuous.test.ts b/src/xy/__tests__/xyReduceNonContinuous.test.ts new file mode 100644 index 00000000..cc57f5c6 --- /dev/null +++ b/src/xy/__tests__/xyReduceNonContinuous.test.ts @@ -0,0 +1,229 @@ +import { expect, test } from 'vitest'; + +import { xyReduceNonContinuous } from '../xyReduceNonContinuous'; + +const x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; +const y = [0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0]; +test('All', () => { + const result = xyReduceNonContinuous( + { x, y }, + { maxApproximateNbPoints: 20 }, + ); + expect(result).toStrictEqual({ + x: Float64Array.from(x), + y: Float64Array.from(y), + }); +}); + +test('Over sized', () => { + const x2 = [1, 2]; + const y2 = [2, 3]; + const result = xyReduceNonContinuous( + { x: x2, y: y2 }, + { maxApproximateNbPoints: 10 }, + ); + expect(result).toStrictEqual({ + x: Float64Array.from([1, 2]), + y: Float64Array.from([2, 3]), + }); +}); + +test('Too large', () => { + const result = { + x: new Float64Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + y: new Float64Array([0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0]), + }; + expect( + xyReduceNonContinuous( + { x, y }, + { maxApproximateNbPoints: 20, from: -10, to: 20 }, + ), + ).toStrictEqual(result); +}); + +test('Part exact', () => { + const result = { + x: new Float64Array([3, 4, 5]), + y: new Float64Array([3, 4, 5]), + }; + expect( + xyReduceNonContinuous( + { x, y }, + { from: 3, to: 5, maxApproximateNbPoints: 20 }, + ), + ).toStrictEqual(result); +}); + +test('Part rounded close', () => { + const result = { + x: new Float64Array([3, 4, 5]), + y: new Float64Array([3, 4, 5]), + }; + expect( + xyReduceNonContinuous( + { x, y }, + { from: 3.1, to: 4.9, maxApproximateNbPoints: 20 }, + ), + ).toStrictEqual(result); +}); + +test('Part rounded far', () => { + const result = { + x: new Float64Array([3, 4, 5]), + y: new Float64Array([3, 4, 5]), + }; + expect( + xyReduceNonContinuous( + { x, y }, + { from: 3.6, to: 4.4, maxApproximateNbPoints: 20 }, + ), + ).toStrictEqual(result); +}); + +test('Part rounded far 2', () => { + const result = xyReduceNonContinuous({ x, y }, { maxApproximateNbPoints: 5 }); + expect(result).toStrictEqual({ x: [0, 3, 6, 8], y: [2, 5, 4, 2] }); +}); + +test('Part rounded big data', () => { + const x2 = []; + const y2 = []; + for (let i = 0; i < 5000000; i++) { + x2.push(i); + y2.push(i); + } + const result = xyReduceNonContinuous( + { x: x2, y: y2 }, + { maxApproximateNbPoints: 4000 }, + ); + expect(result.x).toHaveLength(4000); + expect(result.y).toHaveLength(4000); +}); + +test('Part rounded big data 2', () => { + const x2 = []; + const y2 = []; + for (let i = 0; i < 5000000; i++) { + x2.push(i); + y2.push(i); + } + const result = xyReduceNonContinuous( + { x: x2, y: y2 }, + { maxApproximateNbPoints: 4000, from: 10, to: 20 }, + ); + expect(result.x).toStrictEqual( + Float64Array.from([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + ); + expect(result.y).toStrictEqual( + Float64Array.from([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + ); +}); + +test('xyCheck non-linear x', () => { + const xs = []; + const ys = []; + for (let i = 0; i < 11; i++) { + xs.push(i * 1.2 ** i); + ys.push(i); + } + const result = xyReduceNonContinuous( + { x: xs, y: ys }, + { maxApproximateNbPoints: 5 }, + ); + expect(result.y).toStrictEqual([5, 7, 8, 10]); +}); + +test('xyCheck extreme non-linear x', () => { + const xs = []; + const ys = []; + for (let i = 0; i < 11; i++) { + xs.push(i * 2 ** i); + ys.push(i); + } + const result = xyReduceNonContinuous( + { x: xs, y: ys }, + { maxApproximateNbPoints: 5 }, + ); + expect(result).toStrictEqual({ x: [0, 4608, 10240], y: [8, 9, 10] }); +}); + +test('xyReduceNonContinuous with zones enough points', () => { + const result = xyReduceNonContinuous( + { x, y }, + { + maxApproximateNbPoints: 5, + zones: [ + { from: 0, to: 1 }, + { from: 5, to: 7 }, + ], + }, + ); + + expect(result).toStrictEqual({ + x: new Float64Array([0, 1, 5, 6, 7]), + y: new Float64Array([0, 1, 5, 4, 3]), + }); +}); + +test('xyReduceNonContinuous with zones not enough points edge cases', () => { + const result = xyReduceNonContinuous( + { x, y }, + { + maxApproximateNbPoints: 3, + zones: [ + { from: 0, to: 1 }, + { from: 5, to: 8 }, + ], + }, + ); + expect(result).toStrictEqual({ x: [0, 1, 5], y: [0, 1, 5] }); +}); + +test('xyReduceNonContinuous with zones not enough points', () => { + const result = xyReduceNonContinuous( + { x, y }, + { + maxApproximateNbPoints: 4, + zones: [ + { from: 0, to: 1 }, + { from: 5, to: 8 }, + ], + }, + ); + // the second zone will have only one point because deltaX is 3.3333 + expect(result).toStrictEqual({ x: [0, 1, 5], y: [0, 1, 5] }); +}); + +test('xyReduceNonContinuous with one zone not enough points', () => { + const result = xyReduceNonContinuous( + { x, y }, + { + maxApproximateNbPoints: 4, + zones: [ + { from: -1, to: -1 }, + { from: 3, to: 8 }, + ], + }, + ); + expect(result).toStrictEqual({ x: [3, 7], y: [5, 3] }); +}); + +test('Large data with zones', () => { + const x2 = []; + const y2 = []; + for (let i = 0; i < 5000001; i++) { + x2.push(i); + y2.push(i); + } + const result = xyReduceNonContinuous( + { x: x2, y: y2 }, + { + maxApproximateNbPoints: 6, + zones: [ + { from: 0, to: 1000 }, + { from: 1000000, to: 1001000 }, + ], + }, + ); + expect(result).toStrictEqual({ x: [0, 1000000], y: [1000, 1001000] }); +}); diff --git a/src/xy/index.ts b/src/xy/index.ts index d3499c8b..171f10d2 100644 --- a/src/xy/index.ts +++ b/src/xy/index.ts @@ -32,7 +32,8 @@ export * from './xyMinYPoint'; export * from './xyPeakInfo'; export * from './xyRealMaxYPoint'; export * from './xyRealMinYPoint'; -export * from './xyReduce'; +export { xyReduce } from './xyReduce'; +export * from './xyReduceNonContinuous'; export * from './xyRolling'; export * from './xySetYValue'; export * from './xySortX'; diff --git a/src/xy/xyReduce.ts b/src/xy/xyReduce.ts index 98f1c433..e04fd79c 100644 --- a/src/xy/xyReduce.ts +++ b/src/xy/xyReduce.ts @@ -8,9 +8,9 @@ import { xyCheck } from './xyCheck'; interface InternalZone { from: number; to: number; - fromIndex?: number; - toIndex?: number; - nbPoints?: number; + fromIndex: number; + toIndex: number; + nbPoints: number; } export interface XYReduceOptions { @@ -52,12 +52,14 @@ export interface XYReduceOptions { * You should rather use ml-xy-equally-spaced to make further processing * @param data - Object that contains property x (an ordered increasing array) and y (an array) * @param options - options + * @returns Object with x and y arrays */ export function xyReduce( data: DataXY, options: XYReduceOptions = {}, ): DataXY { xyCheck(data); + // todo we should check that the single point is really in the range and the zones if (data.x.length < 2) { return { x: Float64Array.from(data.x), @@ -76,22 +78,8 @@ export function xyReduce( zones = zonesNormalize(zones, { from, to }); if (zones.length === 0) zones = [{ from, to }]; // we take everything - // for each zone we should know the first index, the last index and the number of points - const internalZones: InternalZone[] = zones; - let totalPoints = 0; - for (const zone of internalZones) { - zone.fromIndex = xFindClosestIndex(x, zone.from); - zone.toIndex = xFindClosestIndex(x, zone.to); - if (zone.fromIndex > 0 && x[zone.fromIndex] > zone.from) { - zone.fromIndex--; - } - if (zone.toIndex < x.length - 1 && x[zone.toIndex] < zone.to) { - zone.toIndex++; - } + const { internalZones, totalPoints } = getInternalZones(zones, x); - zone.nbPoints = zone.toIndex - zone.fromIndex + 1; - totalPoints += zone.nbPoints; - } // we calculate the number of points per zone that we should keep if (totalPoints <= nbPoints) { return notEnoughPoints(x, y, internalZones, totalPoints); @@ -101,7 +89,7 @@ export function xyReduce( let currentTotal = 0; for (let i = 0; i < internalZones.length - 1; i++) { const zone = internalZones[i]; - zone.nbPoints = Math.round((zone.nbPoints as number) * ratio); + zone.nbPoints = Math.round(zone.nbPoints * ratio); currentTotal += zone.nbPoints; } (internalZones.at(-1) as InternalZone).nbPoints = nbPoints - currentTotal; @@ -110,11 +98,7 @@ export function xyReduce( const newY: number[] = []; for (const zone of internalZones) { if (!zone.nbPoints) continue; - appendFromTo( - zone.fromIndex as number, - zone.toIndex as number, - zone.nbPoints, - ); + appendFromTo(zone.fromIndex, zone.toIndex, zone.nbPoints); } return { x: newX, y: newY }; @@ -189,7 +173,7 @@ export function xyReduce( } } -function notEnoughPoints( +export function notEnoughPoints( x: NumberArray, y: NumberArray, internalZones: InternalZone[], @@ -199,11 +183,7 @@ function notEnoughPoints( const newY = new Float64Array(totalPoints); let index = 0; for (const zone of internalZones) { - for ( - let i = zone.fromIndex as number; - i < (zone.toIndex as number) + 1; - i++ - ) { + for (let i = zone.fromIndex; i < zone.toIndex + 1; i++) { newX[index] = x[i]; newY[index] = y[i]; index++; @@ -214,3 +194,29 @@ function notEnoughPoints( y: newY, }; } + +export function getInternalZones(zones: FromTo[], x: NumberArray) { + // for each zone we should know the first index, the last index and the number of points + const internalZones: InternalZone[] = []; + let totalPoints = 0; + for (const zone of zones) { + let fromIndex = xFindClosestIndex(x, zone.from); + let toIndex = xFindClosestIndex(x, zone.to); + if (fromIndex > 0 && x[fromIndex] > zone.from) { + fromIndex--; + } + if (toIndex < x.length - 1 && x[toIndex] < zone.to) { + toIndex++; + } + const nbPoints = toIndex - fromIndex + 1; + internalZones.push({ + from: zone.from, + to: zone.to, + fromIndex, + toIndex, + nbPoints, + }); + totalPoints += nbPoints; + } + return { internalZones, totalPoints }; +} diff --git a/src/xy/xyReduceNonContinuous.ts b/src/xy/xyReduceNonContinuous.ts new file mode 100644 index 00000000..883f1ad8 --- /dev/null +++ b/src/xy/xyReduceNonContinuous.ts @@ -0,0 +1,113 @@ +import { DataXY, DoubleArray, FromTo } from 'cheminfo-types'; + +import { zonesNormalize } from '../zones'; + +import { xyCheck } from './xyCheck'; +import { getInternalZones, notEnoughPoints } from './xyReduce'; + +export interface XYReduceOptions { + /** + * @default x[0] + */ + from?: number; + + /** + * @default x[x.length-1] + */ + to?: number; + + /** + * Number of points but we could have couple more + * @default 4001 + */ + maxApproximateNbPoints?: number; + + /** + * Array of zones to keep (from/to object) + * @default [] + */ + zones?: FromTo[]; +} + +/** + * Reduce the number of points while keeping visually the same noise. Practical to + * display many spectra as SVG. This algorithm is designed for non-continuous data. + * We are expecting peaks to be only positive and the x values to be ordered. + * SHOULD NOT BE USED FOR DATA PROCESSING !!! + * @param data - Object that contains property x (an ordered increasing array) and y (an array) + * @param options - options + * @returns Object with x and y arrays + */ +export function xyReduceNonContinuous( + data: DataXY, + options: XYReduceOptions = {}, +): DataXY { + xyCheck(data); + if (data.x.length < 2) { + // todo we should check that the single point is really in the range and the zones + return { + x: Float64Array.from(data.x), + y: Float64Array.from(data.y), + }; + } + const { x, y } = data; + const { + from = x[0], + to = x.at(-1) as number, + maxApproximateNbPoints = 4001, + } = options; + let { zones = [] } = options; + + zones = zonesNormalize(zones, { from, to }); + if (zones.length === 0) zones = [{ from, to }]; // we take everything + + const { internalZones, totalPoints } = getInternalZones(zones, x); + + // we calculate the number of points per zone that we should keep + if (totalPoints <= maxApproximateNbPoints) { + return notEnoughPoints(x, y, internalZones, totalPoints); + } + + const deltaX = (to - from) / (maxApproximateNbPoints - 1); + const newX: number[] = []; + const newY: number[] = []; + for (const internalZone of internalZones) { + const maxNbPoints = + Math.ceil((internalZone.to - internalZone.from) / deltaX) + 1; + const fromIndex = internalZone.fromIndex; + const toIndex = internalZone.toIndex; + + if (toIndex - fromIndex + 1 <= maxNbPoints) { + // we keep all the points + for (let i = fromIndex; i <= toIndex; i++) { + newX.push(x[i]); + newY.push(y[i]); + } + } else { + // we need to reduce the number of points + let currentX = x[fromIndex]; + let currentY = y[fromIndex]; + let lastX = currentX + deltaX; + newX.push(currentX); + newY.push(currentY); + for (let i = fromIndex; i <= toIndex; i++) { + if (x[i] > lastX) { + // next slot + currentX = x[i]; + currentY = y[i]; + newX.push(currentX); + newY.push(currentY); + lastX += deltaX; + } + if (y[i] > currentY) { + currentY = y[i]; + newY[newY.length - 1] = currentY; + } + } + } + } + return { + x: newX, + y: newY, + }; +}