Skip to content

Commit

Permalink
Merge pull request #116 from nomic-ai/smoother-lazy-manifests
Browse files Browse the repository at this point in the history
keep strict manifests on root tile; typing improvements
  • Loading branch information
bmschmidt authored Jun 10, 2024
2 parents 3da7761 + 2cc4bad commit ac1b3a0
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 52 deletions.
7 changes: 4 additions & 3 deletions src/Deeptable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,10 @@ export class Deeptable {
this.promise = preProcessRootTile.then(async () => {
const batch = await this.root_tile.get_arrow(null);
const schema = batch.schema;
this.root_tile.manifest =
await this.root_tile.deriveManifestInfoFromTileMetadata();

if (!tileManifest) {
this.root_tile.manifest =
await this.root_tile.deriveManifestInfoFromTileMetadata();
}
if (schema.metadata.has('sidecars')) {
const cars = schema.metadata.get('sidecars');
if (typeof cars !== 'string')
Expand Down
54 changes: 29 additions & 25 deletions src/selection.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* eslint-disable no-constant-condition */
import { Deeptable } from './Deeptable';
import { Scatterplot } from './scatterplot';
import { Tile } from './tile';
Expand All @@ -7,6 +8,7 @@ import {
DataType,
StructRowProxy,
Type,
Utf8,
Vector,
makeData,
} from 'apache-arrow';
Expand Down Expand Up @@ -81,9 +83,9 @@ export interface CompositeSelectParams extends SelectParams {
}

function isCompositeSelectParam(
params: Record<string, any>,
params: CompositeSelectParams | BooleanColumnParams | IdSelectParams,
): params is CompositeSelectParams {
return params.composition !== undefined;
return (params as CompositeSelectParams).composition !== undefined;
}

function isComposition(elems: unknown): elems is Composition {
Expand All @@ -99,9 +101,7 @@ async function extractBitmask(tile: Tile, arg: CompArgs): Promise<Bitmask> {
if (isComposition(arg)) {
return applyCompositeFunctionToTile(tile, arg);
} else {
const column = tile.get_column((arg as DataSelection).name) as Promise<
Vector<Bool>
>;
const column = tile.get_column(arg.name) as Promise<Vector<Bool>>;
return Bitmask.from_arrow(await column);
}
}
Expand Down Expand Up @@ -130,7 +130,7 @@ async function applyCompositeFunctionToTile(
} else if (isPluralSelectOperator(operator)) {
const op = args[0];
const bitmasks = await Promise.all(
args.slice(1).map((arg) => extractBitmask(tile, arg)),
args.slice(1).map((arg: CompArgs) => extractBitmask(tile, arg)),
);
const accumulated = bitmasks
.slice(1)
Expand Down Expand Up @@ -173,9 +173,13 @@ function isBinarySelectOperation(
}

function isFunctionSelectParam(
params: Record<string, any>,
params:
| CompositeSelectParams
| BooleanColumnParams
| IdSelectParams
| FunctionSelectParams,
): params is FunctionSelectParams {
return params.tileFunction !== undefined;
return (params as FunctionSelectParams).tileFunction !== undefined;
}

/**
Expand Down Expand Up @@ -364,7 +368,7 @@ export class DataSelection {
} else if (isCompositeSelectParam(params)) {
const { name, composition } = params;
this.composition = composition;
this.add_function_column(name, async (tile: Tile) => {
void this.add_function_column(name, async (tile: Tile) => {
const bitmask = await applyCompositeFunctionToTile(tile, composition);
return bitmask.to_arrow();
}).then(markReady);
Expand All @@ -377,14 +381,14 @@ export class DataSelection {
* @param listener a function to call back. It takes
* as an argument the `tile` that was just added.
*/
on(event: string, listener: (args: any) => void): void {
on(event: string, listener: (args: unknown) => void): void {
if (!this.events[event]) {
this.events[event] = [];
}
this.events[event].push(listener);
}

private dispatch(event: string, args: any): void {
private dispatch(event: string, args: unknown): void {
if (this.events[event]) {
this.events[event].forEach((listener) => listener(args));
}
Expand Down Expand Up @@ -513,8 +517,8 @@ export class DataSelection {
*
* @param fields A list of fields in the data to export.
*/
async export(fields: string[], format: 'json' = 'json') {
/*
// async export(fields: string[], format: 'json' = 'json') {
/*
This would have benefits, but might fetch data we don't actually need.
const preparation = []
Expand All @@ -525,14 +529,14 @@ export class DataSelection {
}
await Promise.all(preparation)
*/
const columns = Object.fromEntries(fields.map((field) => [field, []]));
for (let row of this) {
for (let field of fields) {
columns[field].push(row[field]);
}
}
return columns;
}
// const columns = Object.fromEntries(fields.map((field) => [field, []]));
// for (let row of this) {
// for (let field of fields) {
// columns[field].push(row[field]);
// }
// }
// return columns;
// }

public moveCursorToPoint(
point: StructRowProxy<{ ix: DataType<Type.Int64> }>,
Expand Down Expand Up @@ -857,7 +861,7 @@ function stringmatcher(field: string, matches: string[]) {
if (!node[byte]) {
node[byte] = [];
}
node = node[byte] as TrieArray;
node = node[byte];
}

// Mark the end of a Uint8Array with a special property
Expand All @@ -878,8 +882,8 @@ function stringmatcher(field: string, matches: string[]) {
* The Deepscatter transformation function.
*/
return async function (tile: Tile) {
const col = (await tile.get_column(field)).data[0];
const bytes = col.values as Uint8Array;
const col = ((await tile.get_column(field)) as Vector<Utf8>).data[0];
const bytes = col.values;
const offsets = col.valueOffsets;

// Initialize results as a Float32Array with the same
Expand All @@ -893,7 +897,7 @@ function stringmatcher(field: string, matches: string[]) {
let node = trie;
for (let i = 0; i < len; i++) {
const byte = bytes[start + i];
node = node[byte] as TrieArray;
node = node[byte];
// If the node for this byte doesn't exist, the slice doesn't exist in the trie
if (!node) {
return false;
Expand Down
12 changes: 10 additions & 2 deletions src/shared.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import { ZoomTransform } from 'd3-zoom';
import { TileBufferManager } from './regl_rendering';
import type { Tile } from './tile';
import type { Rectangle } from './tile';
import { ScaleLinear } from 'd3-scale';
export type { Renderer, Deeptable, ConcreteAesthetic };

export type BufferLocation = {
Expand Down Expand Up @@ -61,11 +60,20 @@ export type ScatterplotOptions = {
// allow certain optimizations.
export type TileStructure = 'quadtree' | 'other';

export type LazyTileManifest = {
key: string;
// The number of data points in that specific tile.
nPoints: number;
children: string[];
min_ix: number;
max_ix: number;
extent: Rectangle;
};
export type TileManifest = {
key: string;
// The number of data points in that specific tile.
nPoints: number;
children: TileManifest[] | string[];
children: TileManifest[];
min_ix: number;
max_ix: number;
extent: Rectangle;
Expand Down
48 changes: 28 additions & 20 deletions src/tile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export type Rectangle = {
// }

import type { TileBufferManager } from './regl_rendering';
import type { ArrowBuildable, TileManifest } from './shared';
import type { ArrowBuildable, LazyTileManifest, TileManifest } from './shared';
import { isCompleteManifest } from './typing';

export type RecordBatchCache =
Expand Down Expand Up @@ -59,10 +59,10 @@ export class Tile {
public _highest_known_ix?: number;
public deeptable: Deeptable;
public _transformations: Record<string, Promise<ArrowBuildable>> = {};
public _deriveManifestFromTileMetadata?: Promise<TileManifest>;
public _deriveManifestFromTileMetadata?: Promise<LazyTileManifest>;
//private _promiseOfChildren? = Promise<void>;
private _partialManifest: Partial<TileManifest>;
private _completeManifest?: TileManifest;
private _partialManifest: Partial<TileManifest> | Partial<LazyTileManifest>;
private _manifest?: TileManifest | LazyTileManifest;

// A cache of fetchCalls for downloaded arrow tables, including any table schema metadata.
// Tables may contain more than a single column, so this prevents multiple dispatch.
Expand All @@ -82,12 +82,17 @@ export class Tile {
* @param deeptable The full atlas deeptable of which this tile is a part.
*/
constructor(
key: string | (Partial<TileManifest> & { key: string }),
key:
| string
| (Partial<TileManifest> & { key: string })
| Partial<LazyTileManifest & { key: string }>,
parent: Tile | null,
deeptable: Deeptable,
) {
// If it's just initiated with a key, build that into a minimal manifest.
let manifest: Partial<TileManifest> & { key: string };
let manifest:
| (Partial<TileManifest> & { key: string })
| Partial<LazyTileManifest & { key: string }>;
if (typeof key === 'string') {
manifest = { key };
} else {
Expand All @@ -106,12 +111,12 @@ export class Tile {
if (deeptable === undefined) {
throw new Error('No deeptable provided');
}

// Grab the next identifier off the queue. This should be async safe with the current setup, but
// the logic might fall apart in truly parallel situations.
this.numeric_id = tile_identifier++;

if (isCompleteManifest(manifest)) this.manifest = manifest;

this._partialManifest = manifest;
}

Expand Down Expand Up @@ -256,14 +261,14 @@ export class Tile {
}
}

get manifest(): TileManifest {
if (!this._completeManifest)
get manifest(): TileManifest | LazyTileManifest {
if (!this._manifest)
throw new Error('Attempted to access manifest on partially loaded tile.');

return this._completeManifest;
return this._manifest;
}

set manifest(manifest: TileManifest) {
set manifest(manifest: TileManifest | LazyTileManifest) {
// Setting the manifest is the thing that spawns children.
if (!manifest.children) {
console.error({ manifest });
Expand All @@ -273,7 +278,7 @@ export class Tile {
return new Tile(k, this, this.deeptable);
});
this.highest_known_ix = manifest.max_ix;
this._completeManifest = manifest;
this._manifest = manifest;
}

set highest_known_ix(val) {
Expand Down Expand Up @@ -314,7 +319,7 @@ export class Tile {
}

get min_ix() {
if (this._completeManifest && this.manifest?.min_ix !== undefined) {
if (this._manifest && this.manifest?.min_ix !== undefined) {
return this.manifest.min_ix;
}
if (this.parent) {
Expand All @@ -332,7 +337,7 @@ export class Tile {
}

get extent(): Rectangle {
if (this._completeManifest && this.manifest?.extent) {
if (this._manifest && this.manifest?.extent) {
return this.manifest.extent;
}
return this.theoretical_extent;
Expand Down Expand Up @@ -437,7 +442,7 @@ export class Tile {
* @returns void
*/
async populateManifest(): Promise<void> {
if (this._completeManifest) {
if (this._manifest) {
return;
} else if (this._partialManifest.children) {
if (this._partialManifest.nPoints === undefined) {
Expand All @@ -446,7 +451,7 @@ export class Tile {
this.manifest = {
...this._partialManifest,
key: this.key,
children: this._partialManifest.children,
children: this._partialManifest.children as string[],
min_ix: this.min_ix,
max_ix: this.max_ix,
extent: this.extent,
Expand Down Expand Up @@ -482,13 +487,15 @@ export class Tile {
});
}

async deriveManifestInfoFromTileMetadata(): Promise<TileManifest> {
async deriveManifestInfoFromTileMetadata(): Promise<
TileManifest | LazyTileManifest
> {
// This should only be called once per tile.
if (this._deriveManifestFromTileMetadata !== undefined) {
return this._deriveManifestFromTileMetadata;
}

const manifest: Partial<TileManifest> = {};
const manifest: Partial<LazyTileManifest> = {};
this._deriveManifestFromTileMetadata = this.get_arrow(null).then(
async (batch) => {
// For every column in the root tile,
Expand Down Expand Up @@ -521,7 +528,8 @@ export class Tile {
const children = metadata.get('children');

if (children) {
manifest.children = JSON.parse(children) as TileManifest[] | string[];
const stringChildren = JSON.parse(children) as string[];
manifest.children = stringChildren;
}

// TODO: make ix optionally parsed from metadata, not column.
Expand All @@ -548,7 +556,7 @@ export class Tile {
max_ix: manifest.max_ix,
extent: manifest.extent,
nPoints: batch.numRows,
} as TileManifest;
} as const;
return fullManifest;
},
);
Expand Down
2 changes: 1 addition & 1 deletion src/typing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export function isLabelset(labels: DS.Labelcall): labels is DS.Labelset {

// There must be a general function here huh.
export function isCompleteManifest(
manifest: Partial<DS.TileManifest>,
manifest: Partial<DS.TileManifest> | Partial<DS.LazyTileManifest>,
): manifest is DS.TileManifest {
for (const k of [
'key',
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"outDir": "./dist",
"rootDir": "./src",
"lib": ["DOM"],
"noEmitOnError": false,
"noEmitOnError": true,
"emitDeclarationOnly": true,
},
"$schema": "https://json.schemastore.org/tsconfig",
Expand Down

0 comments on commit ac1b3a0

Please sign in to comment.