From 199970fb5a3c386eeea18fcac53fc35eb93d2bca Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 6 Mar 2024 09:54:31 -0500 Subject: [PATCH] update zarr functionality --- paper/joss/paper.md | 2 +- python/neurosift/cli.py | 10 ++++++++-- python/neurosift/local-file-access-js/src/index.js | 14 ++++++++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/paper/joss/paper.md b/paper/joss/paper.md index 3a450173..5d767f58 100644 --- a/paper/joss/paper.md +++ b/paper/joss/paper.md @@ -42,7 +42,7 @@ Neurodata Without Borders files are structured hierarchically and encapsulate va # Architecture and technical innovation -Neurosift is a *static* React/TypeScript website, meaning that it is delivered to the user's browser exactly as stored, without the need for dynamic server-side processing of requests. This approach simplifies deployment and maintenance; it is currently hosted on GitHub Pages, but could easily be deployed to any other static hosting service. +Neurosift is a *static* React/TypeScript website, meaning that it is delivered to the user's browser exactly as stored, without the need for dynamic server-side processing of requests. This approach simplifies deployment and maintenance; it can be deployed to any static hosting service. The main technical challenge in developing Neurosift was the requirement to lazy-load data objects from remote NWB files, which are built on the complex HDF5 format. While HDF5's efficient data organization is ideal for the large, multidimensional datasets typical in neurophysiology, its primary implementations are in the C language. This necessitates a creative solution to enable efficient web-based access to these files. To bridge this gap, Neurosift leverages WebAssembly to run compiled C code in the browser, specifically utilizing a modified version of the h5wasm [@h5wasm] library. Unlike the unmodified h5wasm, which primarily handles fully downloaded files, Neurosift's fork introduces an innovative approach to efficiently read data chunks from remote files. This allows for synchronous data reads without the need for a prior download of the entire file. This solution not only makes Neurosift a powerful tool for neuroscience research but also showcases the potential of WebAssembly in overcoming challenges associated with web-based data analysis tools. diff --git a/python/neurosift/cli.py b/python/neurosift/cli.py index 2a4e6356..8eea3a51 100644 --- a/python/neurosift/cli.py +++ b/python/neurosift/cli.py @@ -18,7 +18,7 @@ def view_nwb(file): abs_fname = os.path.abspath(file) base_fname = os.path.basename(abs_fname) with TemporaryDirectory(prefix="view_nwb") as tmpdir: - # create a symbolic link to the file + # create a symbolic link to the file (or zarr folder) os.symlink(abs_fname, f'{tmpdir}/{base_fname}') # this directory @@ -59,8 +59,14 @@ def view_nwb(file): # run the service process = subprocess.Popen(['npm', 'run', 'start', tmpdir], cwd=f'{this_directory}/local-file-access-js', shell=shell, env=dict(os.environ, PORT=str(port))) + zarr_param = '' + if os.path.isdir(abs_fname): + if not os.path.exists(f'{abs_fname}/.zmetadata'): + raise Exception(f'{abs_fname} is a directory but does not contain a .zmetadata file.') + zarr_param = '&zarr=1' + # open the browser - url = f"https://flatironinstitute.github.io/neurosift/?p=/nwb&url=http://localhost:{port}/files/{base_fname}" + url = f"https://flatironinstitute.github.io/neurosift/?p=/nwb&url=http://localhost:{port}/files/{base_fname}{zarr_param}" print(f'Opening {url}') webbrowser.open(url) diff --git a/python/neurosift/local-file-access-js/src/index.js b/python/neurosift/local-file-access-js/src/index.js index 53ebc0d5..4f397eff 100644 --- a/python/neurosift/local-file-access-js/src/index.js +++ b/python/neurosift/local-file-access-js/src/index.js @@ -6,9 +6,10 @@ if (!dir) { console.error('Please specify a directory.') process.exit(-1) } +console.info('Serving files in', dir) // Allow CORS from flatironinstitute.github.io and localhost:3000 -const allowedOrigins = ['https://flatironinstitute.github.io', 'http://localhost:3000'] +const allowedOrigins = ['https://flatironinstitute.github.io', 'http://localhost:3000', 'http://localhost:4200'] app.use((req, resp, next) => { const origin = req.get('origin') const allowedOrigin = allowedOrigins.includes(origin) ? origin : undefined @@ -26,19 +27,24 @@ app.options('*', (req, resp) => { // Serve files app.get('/files/:fileName(*)', async (req, resp) => { const fileName = req.params.fileName - // Check if the file is shareable if (!isShareable(fileName)) { + console.warn('Access to this file is forbidden.', fileName) resp.send(500).send('Access to this file is forbidden.') return } // Send the file const options = { - root: dir + root: dir, + // let's allow dot files for now + dotfiles: 'allow' } resp.sendFile(fileName, options, function (err) { // I think it's important to have an error handler even if it's just this. (not sure though) + if (err) { + console.warn('Error sending file:', err) + } }) }) @@ -54,7 +60,7 @@ function isShareable(f) { } const fileName = bb[bb.length - 1] if (fileName.startsWith('.')) { - if (!['.zattrs'].includes(fileName)) { + if (!['.zattrs', '.zgroup', '.zarray', '.zmetadata'].includes(fileName)) { // don't show hidden files (with some exceptions) return false }