Skip to content

Commit

Permalink
Add OPFS (Origin Private File System) Support (#1856)
Browse files Browse the repository at this point in the history
* add opfs feature

* add test for url with long query string

* update s3rver cors settings

* update httpfs test.

* update httpfs test.

* update httpfs test for eslint

* Fixup patch, now allowing installing from other repositories via 'INSTALL x FROM community'

* fix dropfile

* Update packages/duckdb-wasm/test/opfs.test.ts

Co-authored-by: asrar <[email protected]>

* Improve README

* Update README.md

* Add npm_tags.yml

* Perform checkout

* Fix registerFileHandle.

* update comment

* add test for using file in dirrectory

---------

Co-authored-by: Carlo Piovesan <[email protected]>
Co-authored-by: asrar <[email protected]>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent b42a8e7 commit c1365cf
Show file tree
Hide file tree
Showing 24 changed files with 721 additions and 106 deletions.
2 changes: 1 addition & 1 deletion examples/esbuild-node/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import * as duckdb from '@duckdb/duckdb-wasm';
import * as arrow from 'apache-arrow';
import path from 'path';
import Worker from 'web-worker';
import { createRequire } from 'module';

const require = createRequire(import.meta.url);
const DUCKDB_DIST = path.dirname(require.resolve('@duckdb/duckdb-wasm'));
const Worker = require('web-worker');

(async () => {
try {
Expand Down
2 changes: 2 additions & 0 deletions lib/include/duckdb/web/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct WebDBConfig {
std::optional<int8_t> access_mode = std::nullopt;
/// The thread count
uint32_t maximum_threads = (STATIC_WEBDB_FEATURES & (1 << WebDBFeature::THREADS)) ? 4 : 1;
/// The direct io flag
bool use_direct_io = false;
/// The query config
QueryConfig query = {
.cast_bigint_to_double = std::nullopt,
Expand Down
2 changes: 2 additions & 0 deletions lib/include/duckdb/web/io/web_filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ class WebFileSystem : public duckdb::FileSystem {
DataBuffer file_buffer);
/// Try to drop a specific file
bool TryDropFile(std::string_view file_name);
/// drop a specific file
void DropFile(std::string_view file_name);
/// Drop all files without references (including buffers)
void DropDanglingFiles();
/// Configure file statistics
Expand Down
4 changes: 4 additions & 0 deletions lib/js-stubs.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ addToLibrary({
duckdb_web_fs_file_sync: function (fileId) {
return globalThis.DUCKDB_RUNTIME.syncFile(Module, fileId);
},
duckdb_web_fs_file_drop_file__sig: 'vpi',
duckdb_web_fs_file_drop_file: function (fileName, fileNameLen) {
return globalThis.DUCKDB_RUNTIME.dropFile(Module, fileName, fileNameLen);
},
duckdb_web_fs_file_close__sig: 'vi',
duckdb_web_fs_file_close: function (fileId) {
return globalThis.DUCKDB_RUNTIME.closeFile(Module, fileId);
Expand Down
4 changes: 4 additions & 0 deletions lib/src/arrow_type_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ arrow::Result<duckdb::LogicalType> mapArrowTypeToDuckDB(const arrow::DataType& t
case arrow::Type::type::EXTENSION:
case arrow::Type::type::SPARSE_UNION:
case arrow::Type::type::DENSE_UNION:
case arrow::Type::type::STRING_VIEW:
case arrow::Type::type::BINARY_VIEW:
case arrow::Type::type::LIST_VIEW:
case arrow::Type::type::LARGE_LIST_VIEW:
return arrow::Status::NotImplemented("DuckDB type mapping for: ", type.ToString());
}
return duckdb::LogicalTypeId::INVALID;
Expand Down
3 changes: 3 additions & 0 deletions lib/src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) {
if (doc.HasMember("allowUnsignedExtensions") && doc["allowUnsignedExtensions"].IsBool()) {
config.allow_unsigned_extensions = doc["allowUnsignedExtensions"].GetBool();
}
if (doc.HasMember("useDirectIO") && doc["useDirectIO"].IsBool()) {
config.use_direct_io = doc["useDirectIO"].GetBool();
}
if (doc.HasMember("query") && doc["query"].IsObject()) {
auto q = doc["query"].GetObject();
if (q.HasMember("queryPollingInterval") && q["queryPollingInterval"].IsNumber()) {
Expand Down
23 changes: 18 additions & 5 deletions lib/src/io/web_filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ RT_FN(void duckdb_web_fs_file_close(size_t file_id), {
auto &infos = GetLocalState();
infos.handles.erase(file_id);
});
RT_FN(void duckdb_web_fs_file_drop_file(const char *fileName, size_t pathLen), {});
RT_FN(void duckdb_web_fs_file_truncate(size_t file_id, double new_size), { GetOrOpen(file_id).Truncate(new_size); });
RT_FN(time_t duckdb_web_fs_file_get_last_modified_time(size_t file_id), {
auto &file = GetOrOpen(file_id);
Expand Down Expand Up @@ -226,6 +227,8 @@ WebFileSystem::DataProtocol WebFileSystem::inferDataProtocol(std::string_view ur
proto = WebFileSystem::DataProtocol::HTTP;
} else if (hasPrefix(url, "s3://")) {
proto = WebFileSystem::DataProtocol::S3;
} else if (hasPrefix(url, "opfs://")) {
proto = WebFileSystem::DataProtocol::BROWSER_FSACCESS;
} else if (hasPrefix(url, "file://")) {
data_url = std::string_view{url}.substr(7);
proto = default_data_protocol_;
Expand Down Expand Up @@ -453,6 +456,7 @@ void WebFileSystem::DropDanglingFiles() {
for (auto &[file_id, file] : files_by_id_) {
if (file->handle_count_ == 0) {
files_by_name_.erase(file->file_name_);
DropFile(file->file_name_);
if (file->data_url_.has_value()) {
files_by_url_.erase(file->data_url_.value());
}
Expand Down Expand Up @@ -481,6 +485,13 @@ bool WebFileSystem::TryDropFile(std::string_view file_name) {
return false;
}

/// drop a file
void WebFileSystem::DropFile(std::string_view file_name) {
DEBUG_TRACE();
std::string fileNameS = std::string{file_name};
duckdb_web_fs_file_drop_file(fileNameS.c_str(), fileNameS.size());
}

/// Write the global filesystem info
rapidjson::Value WebFileSystem::WriteGlobalFileInfo(rapidjson::Document &doc, uint32_t cache_epoch) {
DEBUG_TRACE();
Expand Down Expand Up @@ -793,7 +804,7 @@ void WebFileSystem::Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_b
auto file_size = file_hdl.file_->file_size_;
auto writer = static_cast<char *>(buffer);
file_hdl.position_ = location;
while (nr_bytes > 0 && location < file_size) {
while (nr_bytes > 0) {
auto n = Write(handle, writer, nr_bytes);
writer += n;
nr_bytes -= n;
Expand Down Expand Up @@ -1006,10 +1017,12 @@ void WebFileSystem::FileSync(duckdb::FileHandle &handle) {
vector<std::string> WebFileSystem::Glob(const std::string &path, FileOpener *opener) {
std::unique_lock<LightMutex> fs_guard{fs_mutex_};
std::vector<std::string> results;
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
if (!FileSystem::IsRemoteFile(path)) {
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
}
}
}
auto &state = GetLocalState();
Expand Down
6 changes: 6 additions & 0 deletions lib/src/json_typedef.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,12 @@ arrow::Result<rapidjson::Value> WriteSQLType(rapidjson::Document& doc, const duc
case duckdb::LogicalTypeId::AGGREGATE_STATE:
case duckdb::LogicalTypeId::BIT:
case duckdb::LogicalTypeId::LAMBDA:
case duckdb::LogicalTypeId::STRING_LITERAL:
case duckdb::LogicalTypeId::INTEGER_LITERAL:
case duckdb::LogicalTypeId::UHUGEINT:
case duckdb::LogicalTypeId::UNION:
case duckdb::LogicalTypeId::ARRAY:
case duckdb::LogicalTypeId::VARINT:
break;
}
return out;
Expand Down
22 changes: 17 additions & 5 deletions lib/src/webdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ arrow::Status WebDB::Open(std::string_view args_json) {
db_config.options.access_mode = access_mode;
db_config.options.duckdb_api = "wasm";
db_config.options.custom_user_agent = config_->custom_user_agent;
db_config.options.use_direct_io = config_->use_direct_io;
auto db = make_shared_ptr<duckdb::DuckDB>(config_->path, &db_config);
#ifndef WASM_LOADABLE_EXTENSIONS
duckdb_web_parquet_init(db.get());
Expand Down Expand Up @@ -912,18 +913,29 @@ arrow::Status WebDB::RegisterFileBuffer(std::string_view file_name, std::unique_
/// Drop all files
arrow::Status WebDB::DropFiles() {
file_page_buffer_->DropDanglingFiles();
pinned_web_files_.clear();
std::vector<std::string> files_to_drop;
for (const auto& [key, handle] : pinned_web_files_) {
files_to_drop.push_back(handle->GetName());
}
for (const auto& fileName : files_to_drop) {
arrow::Status status = DropFile(fileName);
if (!status.ok()) {
return arrow::Status::Invalid("Failed to drop file: " + fileName);
}
}
if (auto fs = io::WebFileSystem::Get()) {
fs->DropDanglingFiles();
}
return arrow::Status::OK();
}
/// Drop a file
arrow::Status WebDB::DropFile(std::string_view file_name) {
file_page_buffer_->TryDropFile(file_name);
pinned_web_files_.erase(file_name);
arrow::Status WebDB::DropFile(std::string_view fileName) {
file_page_buffer_->TryDropFile(fileName);
pinned_web_files_.erase(fileName);
if (auto fs = io::WebFileSystem::Get()) {
if (!fs->TryDropFile(file_name)) {
if (fs->TryDropFile(fileName)) {
fs->DropFile(fileName);
} else {
return arrow::Status::Invalid("file is in use");
}
}
Expand Down
Loading

0 comments on commit c1365cf

Please sign in to comment.