diff --git a/.gitignore b/.gitignore index f5d6fe310..473aed741 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ /.emscripten_cache .DS_Store compile_commands.json +*.map /target diff --git a/examples/esbuild-browser/index.ts b/examples/esbuild-browser/index.ts index a940390d9..a1257879d 100644 --- a/examples/esbuild-browser/index.ts +++ b/examples/esbuild-browser/index.ts @@ -24,9 +24,23 @@ import * as arrow from 'apache-arrow'; const db = new duckdb.AsyncDuckDB(logger, worker); await db.instantiate(DUCKDB_CONFIG.mainModule, DUCKDB_CONFIG.pthreadWorker); + // in-memory const conn = await db.connect(); await conn.query<{ v: arrow.Int }>(`SELECT count(*)::INTEGER as v FROM generate_series(0, 100) t(v)`); + // opfs + // const opfsRoot = await navigator.storage.getDirectory(); + // await opfsRoot.removeEntry('test.db').catch(e => {}); + // await db.open({ + // path: 'opfs://test.db', + // accessMode: duckdb.DuckDBAccessMode.READ_WRITE, + // }); + // const conn = await db.connect(); + // await conn.send(`CREATE TABLE integers(i INTEGER, j INTEGER);`); + // await conn.send(`INSERT INTO integers VALUES (3, 4), (5, 6);`); + // await conn.send(`CHECKPOINT;`); + // console.log(await conn.query(`SELECT * FROM integers;`)); + await conn.close(); await db.terminate(); await worker.terminate(); diff --git a/lib/include/duckdb/web/config.h b/lib/include/duckdb/web/config.h index feceecce3..11935fb02 100644 --- a/lib/include/duckdb/web/config.h +++ b/lib/include/duckdb/web/config.h @@ -79,6 +79,8 @@ struct WebDBConfig { std::optional access_mode = std::nullopt; /// The thread count uint32_t maximum_threads = 1; + /// The direct io flag + bool use_direct_io = false; /// The query config QueryConfig query = { .cast_bigint_to_double = std::nullopt, diff --git a/lib/src/config.cc b/lib/src/config.cc index 229413e52..13630ca0e 100644 --- a/lib/src/config.cc +++ b/lib/src/config.cc @@ -69,6 +69,9 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) { if (doc.HasMember("maximumThreads") && doc["maximumThreads"].IsNumber()) { config.maximum_threads = doc["maximumThreads"].GetInt(); } + if (doc.HasMember("useDirectIO") && doc["useDirectIO"].IsBool()) { + config.use_direct_io = doc["useDirectIO"].GetBool(); + } if (doc.HasMember("query") && doc["query"].IsObject()) { auto q = doc["query"].GetObject(); if (q.HasMember("queryPollingInterval") && q["queryPollingInterval"].IsNumber()) { diff --git a/lib/src/io/web_filesystem.cc b/lib/src/io/web_filesystem.cc index f9c9b3096..c0ce7013a 100644 --- a/lib/src/io/web_filesystem.cc +++ b/lib/src/io/web_filesystem.cc @@ -226,6 +226,8 @@ WebFileSystem::DataProtocol WebFileSystem::inferDataProtocol(std::string_view ur proto = WebFileSystem::DataProtocol::HTTP; } else if (hasPrefix(url, "s3://")) { proto = WebFileSystem::DataProtocol::S3; + } else if (hasPrefix(url, "opfs://")) { + proto = WebFileSystem::DataProtocol::BROWSER_FSACCESS; } else if (hasPrefix(url, "file://")) { data_url = std::string_view{url}.substr(7); proto = default_data_protocol_; @@ -778,7 +780,7 @@ void WebFileSystem::Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_b auto file_size = file_hdl.file_->file_size_; auto writer = static_cast(buffer); file_hdl.position_ = location; - while (nr_bytes > 0 && location < file_size) { + while (nr_bytes > 0) { auto n = Write(handle, writer, nr_bytes); writer += n; nr_bytes -= n; diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 096db4bc3..110c49f8c 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -818,6 +818,7 @@ arrow::Status WebDB::Open(std::string_view args_json) { db_config.options.maximum_threads = config_->maximum_threads; db_config.options.use_temporary_directory = false; db_config.options.access_mode = access_mode; + db_config.options.use_direct_io = config_->use_direct_io; auto db = std::make_shared(config_->path, &db_config); #ifndef WASM_LOADABLE_EXTENSIONS duckdb_web_parquet_init(db.get()); diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index dc82dedef..4f4afd36d 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -444,13 +444,32 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { } dropResponseBuffers(this.mod); } + /** Prepare a file handle that could only be acquired aschronously */ + public async prepareDBFileHandle(path: string, protocol: DuckDBDataProtocol): Promise { + if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && this._runtime.prepareDBFileHandle) { + const list = await this._runtime.prepareDBFileHandle(path, DuckDBDataProtocol.BROWSER_FSACCESS); + for (const item of list) { + const { handle, path: filePath, fromCached } = item; + if (!fromCached && handle.getSize()) { + await this.registerFileHandle(filePath, handle, DuckDBDataProtocol.BROWSER_FSACCESS, true); + } + } + return; + } + throw new Error(`prepareDBFileHandle: unsupported protocol ${protocol}`); + } /** Register a file object URL */ - public registerFileHandle( + public async registerFileHandle( name: string, handle: HandleType, protocol: DuckDBDataProtocol, directIO: boolean, - ): void { + ): Promise { + if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && handle instanceof FileSystemFileHandle) { + // handle is an async handle, should convert to sync handle + const fileHandle: FileSystemFileHandle = handle as any; + handle = (await fileHandle.createSyncAccessHandle()) as any; + } const [s, d, n] = callSRet( this.mod, 'duckdb_web_fs_register_file_url', @@ -462,6 +481,9 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { } dropResponseBuffers(this.mod); globalThis.DUCKDB_RUNTIME._files = (globalThis.DUCKDB_RUNTIME._files || new Map()).set(name, handle); + if (globalThis.DUCKDB_RUNTIME._preparedHandles?.[name]) { + delete globalThis.DUCKDB_RUNTIME._preparedHandles[name]; + } if (this.pthread) { for (const worker of this.pthread.runningWorkers) { worker.postMessage({ diff --git a/packages/duckdb-wasm/src/bindings/bindings_interface.ts b/packages/duckdb-wasm/src/bindings/bindings_interface.ts index 9dcf422db..b33d637d6 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_interface.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_interface.ts @@ -41,7 +41,8 @@ export interface DuckDBBindings { handle: HandleType, protocol: DuckDBDataProtocol, directIO: boolean, - ): void; + ): Promise; + prepareDBFileHandle(path: string, protocol: DuckDBDataProtocol): Promise; globFiles(path: string): WebFile[]; dropFile(name: string): void; dropFiles(): void; diff --git a/packages/duckdb-wasm/src/bindings/config.ts b/packages/duckdb-wasm/src/bindings/config.ts index e0cc9da4b..e2f4107e9 100644 --- a/packages/duckdb-wasm/src/bindings/config.ts +++ b/packages/duckdb-wasm/src/bindings/config.ts @@ -49,6 +49,10 @@ export interface DuckDBConfig { * Note that this will only work with cross-origin isolated sites since it requires SharedArrayBuffers. */ maximumThreads?: number; + /** + * The direct io flag + */ + useDirectIO?: boolean; /** * The query config */ diff --git a/packages/duckdb-wasm/src/bindings/runtime.ts b/packages/duckdb-wasm/src/bindings/runtime.ts index 2b137c7a0..a28b4cf93 100644 --- a/packages/duckdb-wasm/src/bindings/runtime.ts +++ b/packages/duckdb-wasm/src/bindings/runtime.ts @@ -87,6 +87,12 @@ export interface DuckDBGlobalFileInfo { s3Config?: S3Config; } +export interface PreparedDBFileHandle { + path: string; + handle: any; + fromCached: boolean; +} + /** Call a function with packed response buffer */ export function callSRet( mod: DuckDBModule, @@ -147,6 +153,8 @@ export interface DuckDBRuntime { checkFile(mod: DuckDBModule, pathPtr: number, pathLen: number): boolean; removeFile(mod: DuckDBModule, pathPtr: number, pathLen: number): void; + prepareDBFileHandle?: (path: string, protocol: DuckDBDataProtocol) => Promise; + // Call a scalar UDF function callScalarUDF( mod: DuckDBModule, diff --git a/packages/duckdb-wasm/src/bindings/runtime_browser.ts b/packages/duckdb-wasm/src/bindings/runtime_browser.ts index 74948bae7..74797f73a 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_browser.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_browser.ts @@ -11,13 +11,19 @@ import { failWith, FileFlags, readString, + PreparedDBFileHandle, } from './runtime'; import { DuckDBModule } from './duckdb_module'; import * as udf from './udf_runtime'; +const OPFS_PREFIX_LEN = 'opfs://'.length; +const PATH_SEP_REGEX = /\/|\\/; + export const BROWSER_RUNTIME: DuckDBRuntime & { + _files: Map; _fileInfoCache: Map; _globalFileInfo: DuckDBGlobalFileInfo | null; + _preparedHandles: Record; getFileInfo(mod: DuckDBModule, fileId: number): DuckDBFileInfo | null; getGlobalFileInfo(mod: DuckDBModule): DuckDBGlobalFileInfo | null; @@ -26,6 +32,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { _fileInfoCache: new Map(), _udfFunctions: new Map(), _globalFileInfo: null, + _preparedHandles: {} as any, getFileInfo(mod: DuckDBModule, fileId: number): DuckDBFileInfo | null { try { @@ -50,6 +57,10 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } const file = { ...info, blob: null } as DuckDBFileInfo; BROWSER_RUNTIME._fileInfoCache.set(fileId, file); + if (!BROWSER_RUNTIME._files.has(file.fileName) && BROWSER_RUNTIME._preparedHandles[file.fileName]) { + BROWSER_RUNTIME._files.set(file.fileName, BROWSER_RUNTIME._preparedHandles[file.fileName]); + delete BROWSER_RUNTIME._preparedHandles[file.fileName]; + } return file; } catch (e: any) { console.log(e); @@ -86,6 +97,59 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } }, + /** Prepare a file handle that could only be acquired aschronously */ + async prepareDBFileHandle(dbPath: string, protocol: DuckDBDataProtocol): Promise { + if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS) { + const filePaths = [dbPath, `${dbPath}.wal`]; + const prepare = async (path: string): Promise => { + if (BROWSER_RUNTIME._files.has(path)) { + return { + path, + handle: BROWSER_RUNTIME._files.get(path), + fromCached: true, + }; + } + const opfsRoot = await navigator.storage.getDirectory(); + let dirHandle: FileSystemDirectoryHandle = opfsRoot; + // check if mkdir -p is needed + const opfsPath = path.slice(OPFS_PREFIX_LEN); + let fileName = opfsPath; + if (PATH_SEP_REGEX.test(opfsPath)) { + const folders = opfsPath.split(PATH_SEP_REGEX); + fileName = folders.pop()!; + if (!fileName) { + throw new Error(`Invalid path ${path}`); + } + // mkdir -p + for (const folder of folders) { + dirHandle = await dirHandle.getDirectoryHandle(folder, { create: true }); + } + } + const fileHandle = await dirHandle.getFileHandle(fileName, { create: false }).catch(e => { + if (e?.name === 'NotFoundError') { + console.log(`File ${path} does not exists yet, creating`); + return dirHandle.getFileHandle(fileName, { create: true }); + } + throw e; + }); + const handle = await fileHandle.createSyncAccessHandle(); + BROWSER_RUNTIME._preparedHandles[path] = handle; + return { + path, + handle, + fromCached: false, + }; + }; + const result: PreparedDBFileHandle[] = []; + for (const filePath of filePaths) { + const res = await prepare(filePath); + result.push(res); + } + return result; + } + throw new Error(`Unsupported protocol ${protocol} for path ${dbPath} with protocol ${protocol}`); + }, + testPlatformFeature: (_mod: DuckDBModule, feature: number): boolean => { switch (feature) { case 1: @@ -182,7 +246,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { // Try to fallback to full read? if (file.allowFullHttpReads) { - if ((contentLength !== null) && (+contentLength > 1)) { + if (contentLength !== null && +contentLength > 1) { // 2. Send a dummy GET range request querying the first byte of the file // -> good IFF status is 206 and contentLenght2 is 1 // -> otherwise, iff 200 and contentLenght2 == contentLenght @@ -264,6 +328,21 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { mod.HEAPF64[(result >> 3) + 1] = buffer; return result; } + case DuckDBDataProtocol.BROWSER_FSACCESS: { + const handle: FileSystemSyncAccessHandle = BROWSER_RUNTIME._files?.get(file.fileName); + if (!handle) { + throw new Error(`No OPFS access handle registered with name: ${file.fileName}`); + } + if (flags & FileFlags.FILE_FLAGS_FILE_CREATE_NEW) { + handle.truncate(0); + } + const result = mod._malloc(2 * 8); + const fileSize = handle.getSize(); + console.log(`[BROWSER_RUNTIME] opening ${file.fileName} with size ${fileSize}`); + mod.HEAPF64[(result >> 3) + 0] = fileSize; + mod.HEAPF64[(result >> 3) + 1] = 0; + return result; + } } } catch (e: any) { // TODO (samansmink): this path causes the WASM code to hang @@ -311,11 +390,19 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { return; } const contentLength = xhr2.getResponseHeader('Content-Length'); - if (contentLength && (+contentLength > 1)) { - console.warn(`Range request for ${path} did not return a partial response: ${xhr2.status} "${xhr2.statusText}"`); + if (contentLength && +contentLength > 1) { + console.warn( + `Range request for ${path} did not return a partial response: ${xhr2.status} "${xhr2.statusText}"`, + ); } } mod.ccall('duckdb_web_fs_glob_add_path', null, ['string'], [path]); + } else { + for (const [filePath] of BROWSER_RUNTIME._files!.entries() || []) { + if (filePath.startsWith(path)) { + mod.ccall('duckdb_web_fs_glob_add_path', null, ['string'], [filePath]); + } + } } } catch (e: any) { console.log(e); @@ -340,6 +427,8 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } xhr.send(null); return xhr.status == 206 || xhr.status == 200; + } else { + return BROWSER_RUNTIME._files.has(path); } } catch (e: any) { console.log(e); @@ -361,11 +450,13 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { // XXX Remove from registry return; case DuckDBDataProtocol.BROWSER_FSACCESS: { - const handle = BROWSER_RUNTIME._files?.get(file.fileName); + const handle: FileSystemSyncAccessHandle = BROWSER_RUNTIME._files?.get(file.fileName); if (!handle) { throw new Error(`No OPFS access handle registered with name: ${file.fileName}`); } - return handle.flush(); + handle.flush(); + handle.close(); + BROWSER_RUNTIME._files.delete(file.fileName); } } }, @@ -429,8 +520,14 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } else if (xhr.status == 200) { // TODO: here we are actually throwing away all non-relevant bytes, but this is still better than failing // proper solution would require notifying duckdb-wasm cache, while we are piggybackign on browser cache - console.warn(`Range request for ${file.dataUrl} did not return a partial response: ${xhr.status} "${xhr.statusText}"`); - const src = new Uint8Array(xhr.response, location, Math.min(xhr.response.byteLength-location, bytes)); + console.warn( + `Range request for ${file.dataUrl} did not return a partial response: ${xhr.status} "${xhr.statusText}"`, + ); + const src = new Uint8Array( + xhr.response, + location, + Math.min(xhr.response.byteLength - location, bytes), + ); mod.HEAPU8.set(src, buf); return src.byteLength; } else { @@ -454,7 +551,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { return data.byteLength; } case DuckDBDataProtocol.BROWSER_FSACCESS: { - const handle = BROWSER_RUNTIME._files?.get(file.fileName); + const handle: FileSystemSyncAccessHandle = BROWSER_RUNTIME._files.get(file.fileName); if (!handle) { throw new Error(`No OPFS access handle registered with name: ${file.fileName}`); } @@ -491,7 +588,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { failWith(mod, 'cannot write using the html5 file reader api'); return 0; case DuckDBDataProtocol.BROWSER_FSACCESS: { - const handle = BROWSER_RUNTIME._files?.get(file.fileName); + const handle: FileSystemSyncAccessHandle = BROWSER_RUNTIME._files?.get(file.fileName); if (!handle) { throw new Error(`No OPFS access handle registered with name: ${file.fileName}`); } diff --git a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts index 5d1ea6b1c..188ab9973 100644 --- a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts +++ b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts @@ -1,4 +1,4 @@ -import { DuckDBBindings } from '../bindings'; +import { DuckDBBindings, DuckDBDataProtocol } from '../bindings'; import { WorkerResponseVariant, WorkerRequestVariant, WorkerRequestType, WorkerResponseType } from './worker_request'; import { Logger, LogEntryVariant } from '../log'; import { InstantiationProgress } from '../bindings/progress'; @@ -134,10 +134,16 @@ export abstract class AsyncDuckDBDispatcher implements Logger { this.sendOK(request); break; - case WorkerRequestType.OPEN: + case WorkerRequestType.OPEN: { + const path = request.data.path; + if (path?.startsWith('opfs://')) { + await this._bindings.prepareDBFileHandle(path, DuckDBDataProtocol.BROWSER_FSACCESS); + request.data.useDirectIO = true; + } this._bindings.open(request.data); this.sendOK(request); break; + } case WorkerRequestType.DROP_FILE: this._bindings.dropFile(request.data); this.sendOK(request); @@ -322,7 +328,7 @@ export abstract class AsyncDuckDBDispatcher implements Logger { break; case WorkerRequestType.REGISTER_FILE_HANDLE: - this._bindings.registerFileHandle( + await this._bindings.registerFileHandle( request.data[0], request.data[1], request.data[2], diff --git a/packages/duckdb-wasm/test/index_browser.ts b/packages/duckdb-wasm/test/index_browser.ts index 0668b555d..7dfd19668 100644 --- a/packages/duckdb-wasm/test/index_browser.ts +++ b/packages/duckdb-wasm/test/index_browser.ts @@ -100,6 +100,7 @@ import { testBindings, testAsyncBindings } from './bindings.test'; import { testBatchStream } from './batch_stream.test'; import { testAsyncBatchStream } from './batch_stream_async.test'; import { testFilesystem } from './filesystem.test'; +import { testOPFS } from './opfs.test'; import { testArrowInsert, testArrowInsertAsync } from './insert_arrow.test'; import { testJSONInsert, testJSONInsertAsync } from './insert_json.test'; import { testCSVInsert, testCSVInsertAsync } from './insert_csv.test'; @@ -126,6 +127,7 @@ testAsyncBindings(() => adb!, dataURL, duckdb.DuckDBDataProtocol.HTTP); testBatchStream(() => db!); testAsyncBatchStream(() => adb!); testFilesystem(() => adb!, resolveData, dataURL, duckdb.DuckDBDataProtocol.HTTP); +testOPFS(dataURL, () => DUCKDB_BUNDLE!); testArrowInsert(() => db!); testArrowInsertAsync(() => adb!); testJSONInsert(() => db!); diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts new file mode 100644 index 000000000..6c42937de --- /dev/null +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -0,0 +1,139 @@ +import * as duckdb from '../src/'; +import * as arrow from 'apache-arrow'; + +export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): void { + // we use a separate db instance here because karma will run all tests in parallel, + // a seperate db avoid the conflict because we use db.open here + let db: duckdb.AsyncDuckDB; + let conn: duckdb.AsyncDuckDBConnection; + + beforeAll(async () => { + const opfsRoot = await navigator.storage.getDirectory(); + await opfsRoot.removeEntry('test.db').catch(e => {}); + const logger = new duckdb.ConsoleLogger(); + const worker = new Worker(bundle().mainWorker!); + db = new duckdb.AsyncDuckDB(logger, worker); + await db.instantiate(bundle().mainModule, bundle().pthreadWorker); + }); + + afterAll(async () => { + await conn.close(); + await db.terminate(); + }); + + beforeEach(async () => { + const opfsRoot = await navigator.storage.getDirectory(); + await opfsRoot.removeEntry('test.db').catch(e => {}); + await db.open({ + path: 'opfs://test.db', + accessMode: duckdb.DuckDBAccessMode.READ_WRITE, + }); + conn = await db.connect(); + }); + + afterEach(async () => { + await conn.close(); + // switching from opfs db to in-memory db will close and release the file + await db.open({}); + }); + + describe('Load Data', () => { + it('Imporet Small Parquet file', async () => { + await conn.send(`CREATE TABLE stu AS SELECT * FROM "${baseDir}/uni/studenten.parquet"`); + const result = await conn.send(`SELECT matrnr FROM stu;`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.toArray()).toEqual( + new Int32Array([24002, 25403, 26120, 26830, 27550, 28106, 29120, 29555]), + ); + }); + + it('Import Larget Parquet file', async () => { + await conn.send(`CREATE TABLE lineitem AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem;`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + + it('Load Existing DB File in OPFS', async () => { + // first create a db file with data + await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + await conn.send(`CHECKPOINT;`); + // exit, reopen and load the persisted db file + await conn.close(); + await db.open({}); + await db.open({ + path: 'opfs://test.db', + accessMode: duckdb.DuckDBAccessMode.READ_WRITE, + }); + conn = await db.connect(); + + const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM tmp;`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + + it('Export as CSV to OPFS + Load CSV that are already in OPFS', async () => { + // 1. register a opfs file handle so csv data can be exported from duckdb to opfs + const opfsRoot = await navigator.storage.getDirectory(); + await opfsRoot.removeEntry('test.csv').catch(() => {}); + const testHandle = await opfsRoot.getFileHandle('test.csv', { create: true }); + await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + // 2. export csv data to opfs + await conn.send(`COPY (SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet") TO 'test.csv'`); + // 3. exit, reopen and load the csv file + await db.dropFile('test.csv'); + await conn.close(); + await db.open({}); + conn = await db.connect(); + await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + + const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test.csv';`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + + it('Load Parquet file that are already in OPFS', async () => { + // download parquet + const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => + res.arrayBuffer(), + ); + + // write parquet to opfs + const opfsRoot = await navigator.storage.getDirectory(); + await opfsRoot.removeEntry('test.parquet').catch(() => {}); + const fileHandle = await opfsRoot.getFileHandle('test.parquet', { create: true }); + const writable = await fileHandle.createWritable(); + await writable.write(parquetBuffer); + await writable.close(); + + // load parquet from opfs using read_parquet function + // Note: even if we do not use read_parquet function, it works as well, here we use read_parquet to test the function + await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await conn.send(`CREATE TABLE lineitem AS SELECT * FROM read_parquet('test.parquet')`); + + const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem;`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + }); +}