Skip to content

Commit

Permalink
feat(shapefile): Typed Shapefile loaders
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen committed Sep 5, 2023
1 parent b7c1ef5 commit 5f4f359
Show file tree
Hide file tree
Showing 19 changed files with 243 additions and 202 deletions.
14 changes: 11 additions & 3 deletions modules/shapefile/src/dbf-loader.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
import type {Loader, LoaderWithParser} from '@loaders.gl/loader-utils';
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {ObjectRowTable} from '@loaders.gl/schema';
// import type {DBFResult} from './lib/parsers/parse-dbf';
import {parseDBF, parseDBFInBatches} from './lib/parsers/parse-dbf';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

export type DBFLoaderOptions = LoaderOptions & {
dbf?: {
encoding?: string;
};
};

/**
* DBFLoader - DBF files are used to contain non-geometry columns in Shapefiles
*/
export const DBFWorkerLoader: Loader = {
export const DBFWorkerLoader: Loader<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
name: 'DBF',
id: 'dbf',
module: 'shapefile',
Expand All @@ -25,7 +33,7 @@ export const DBFWorkerLoader: Loader = {
};

/** DBF file loader */
export const DBFLoader: LoaderWithParser = {
export const DBFLoader: LoaderWithParser<ObjectRowTable, ObjectRowTable, DBFLoaderOptions> = {
...DBFWorkerLoader,
parse: async (arrayBuffer, options) => parseDBF(arrayBuffer, options),
parseSync: parseDBF,
Expand Down
140 changes: 92 additions & 48 deletions modules/shapefile/src/lib/parsers/parse-dbf.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,53 @@
import {Field, ObjectRowTable} from '@loaders.gl/schema';
import BinaryChunkReader from '../streaming/binary-chunk-reader';
import {
DBFLoaderOptions,
DBFResult,
DBFTableOutput,
DBFHeader,
DBFRowsOutput,
DBFField
} from './types';
// loaders.gl, MIT license

import type {Field, ObjectRowTable, ObjectRowTableBatch} from '@loaders.gl/schema';
import {Schema} from '@loaders.gl/schema';
import {BinaryChunkReader} from '../streaming/binary-chunk-reader';

type DBFParserOptions = {
dbf?: {
encoding?: string;
};
};

export type DBFResult = {
data: {[key: string]: unknown[]}[];
schema?: Schema;
error?: string;
dbfHeader?: DBFHeader;
dbfFields?: DBFField[];
progress: {
bytesUsed: number;
rowsTotal: number;
rows: number;
};
};

/** Binary header stored in DBF file */
export type DBFHeader = {
/** Last updated date - year */
year: number;
/** Last updated date - month */
month: number;
/** Last updated date - day */
day: number;
/** Number of records in data file */
nRecords: number;
/** Length of header in bytes */
headerLength: number;
/** Length of each record */
recordLength: number;
/** Not clear if this is usually set */
languageDriver: number;
};

/** Field descriptor */
export type DBFField = {
name: string;
dataType: string;
fieldLength: number;
decimal: number;
};

const LITTLE_ENDIAN = true;
const DBF_HEADER_SIZE = 32;
Expand All @@ -25,7 +65,12 @@ class DBFParser {
textDecoder: TextDecoder;
state = STATE.START;
result: DBFResult = {
data: []
data: [],
progress: {
bytesUsed: 0,
rowsTotal: 0,
rows: 0
}
};

constructor(options: {encoding: string}) {
Expand Down Expand Up @@ -62,42 +107,30 @@ class DBFParser {
* @param options
* @returns DBFTable or rows
*/
export function parseDBF(
arrayBuffer: ArrayBuffer,
options: DBFLoaderOptions = {}
): DBFRowsOutput | DBFTableOutput | ObjectRowTable {
export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFParserOptions = {}): ObjectRowTable {
const {encoding = 'latin1'} = options.dbf || {};

const dbfParser = new DBFParser({encoding});
dbfParser.write(arrayBuffer);
dbfParser.end();

const {data, schema} = dbfParser.result;
const shape = options?.tables?.format || options?.dbf?.shape;
switch (shape) {
case 'object-row-table': {
const table: ObjectRowTable = {
shape: 'object-row-table',
schema,
data
};
return table;
}
case 'table':
return {schema, rows: data};
case 'rows':
default:
return data;
}
const table: ObjectRowTable = {
shape: 'object-row-table',
schema,
data
};
return table;
}

/**
* @param asyncIterator
* @param options
*/
export async function* parseDBFInBatches(
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
options: DBFLoaderOptions = {}
): AsyncIterable<DBFHeader | DBFRowsOutput | DBFTableOutput> {
options: DBFParserOptions = {}
): AsyncIterable<ObjectRowTableBatch> {
const {encoding = 'latin1'} = options.dbf || {};

const parser = new DBFParser({encoding});
Expand All @@ -106,26 +139,46 @@ export async function* parseDBFInBatches(
parser.write(arrayBuffer);
if (!headerReturned && parser.result.dbfHeader) {
headerReturned = true;
yield parser.result.dbfHeader;
yield {
batchType: 'metadata',
shape: 'object-row-table',
data: [],
length: 0,
// Additional data
dbfHeader: parser.result.dbfHeader
};
}

if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
parser.result.data = [];
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}
parser.end();
if (parser.result.data.length > 0) {
yield parser.result.data;
const data = parser.result.data;
yield {
batchType: 'data',
shape: 'object-row-table',
data,
length: data.length
};
}
}
/**
* https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
* State machine for DBF parsing
* @param state
* @param result
* @param binaryReader
* @param textDecoder
* @returns
* @see https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
*/
/* eslint-disable complexity, max-depth */
function parseState(
Expand Down Expand Up @@ -161,8 +214,7 @@ function parseState(
case STATE.FIELD_DESCRIPTORS:
// Parse DBF field descriptors (schema)
const fieldDescriptorView = binaryReader.getDataView(
// @ts-ignore
result.dbfHeader.headerLength - DBF_HEADER_SIZE
result.dbfHeader!.headerLength - DBF_HEADER_SIZE
);
if (!fieldDescriptorView) {
return state;
Expand Down Expand Up @@ -191,10 +243,8 @@ function parseState(
// Note: Avoid actually reading the last byte, which may not be present
binaryReader.skip(1);

// @ts-ignore
const row = parseRow(recordView, result.dbfFields, textDecoder);
const row = parseRow(recordView, result.dbfFields || [], textDecoder);
result.data.push(row);
// @ts-ignore
result.progress.rows = result.data.length;
}
state = STATE.END;
Expand All @@ -218,17 +268,12 @@ function parseState(
*/
function parseDBFHeader(headerView: DataView): DBFHeader {
return {
// Last updated date
year: headerView.getUint8(1) + 1900,
month: headerView.getUint8(2),
day: headerView.getUint8(3),
// Number of records in data file
nRecords: headerView.getUint32(4, LITTLE_ENDIAN),
// Length of header in bytes
headerLength: headerView.getUint16(8, LITTLE_ENDIAN),
// Length of each record
recordLength: headerView.getUint16(10, LITTLE_ENDIAN),
// Not sure if this is usually set
languageDriver: headerView.getUint8(29)
};
}
Expand Down Expand Up @@ -266,7 +311,6 @@ function parseRows(binaryReader, fields, nRecords, recordLength, textDecoder) {
for (let i = 0; i < nRecords; i++) {
const recordView = binaryReader.getDataView(recordLength - 1);
binaryReader.skip(1);
// @ts-ignore
rows.push(parseRow(recordView, fields, textDecoder));
}
return rows;
Expand Down
Loading

0 comments on commit 5f4f359

Please sign in to comment.