From dc535ee42bc85c653f75da91b4dee8ace0a1e120 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 28 Nov 2024 13:23:53 -0500 Subject: [PATCH] feat: Add --prune option to prevent walking subtrees --- src/files/deno.test.ts | 18 ++++++++++++++++-- src/files/deno.ts | 18 ++++++++++++++---- src/files/ignore.ts | 9 +++++++-- src/main.ts | 6 +++++- src/setup/options.ts | 5 +++++ src/tests/regression.test.ts | 3 +-- 6 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/files/deno.test.ts b/src/files/deno.test.ts index 330f99f2..5182b271 100644 --- a/src/files/deno.test.ts +++ b/src/files/deno.test.ts @@ -12,9 +12,11 @@ await requestReadPermission() // Use this file for testing file behavior const testUrl = import.meta.url const testPath = fromFileUrl(testUrl) -const testDir = dirname(testPath) +const testDir = dirname(testPath) // $REPO/src/files const testFilename = basename(testPath) +const repoRoot = dirname(dirname(dirname(testPath))) const ignore = new FileIgnoreRules([]) +const prune = new FileIgnoreRules(['derivatives'], false) Deno.test('Deno implementation of BIDSFile', async (t) => { await t.step('implements basic file properties', () => { @@ -53,7 +55,7 @@ Deno.test('Deno implementation of BIDSFile', async (t) => { 'strips BOM characters when reading UTF-8 via .text()', async () => { // BOM is invalid in JSON but shows up often from certain tools, so abstract handling it - const bomDir = join(testPath, '..', '..', 'tests') + const bomDir = join(repoRoot, 'src', 'tests') const bomFilename = 'bom-utf8.json' const file = new BIDSFileDeno(bomDir, bomFilename, ignore) const text = await file.text() @@ -75,4 +77,16 @@ Deno.test('Deno implementation of FileTree', async (t) => { assert(testObj !== undefined) assertEquals(testObj.path, `/${parent}/${testFilename}`) }) + + await t.step('implements pruning', async () => { + const dsDir = join(repoRoot, 'tests', 'data', 'valid_dataset') + const derivFile = + 'derivatives/fmriprep/sub-01/ses-01/func/sub-01_ses-01_task-rest_confounds.tsv.gz' + + const fullTree = await readFileTree(dsDir) + assert(fullTree.get(derivFile)) + + const prunedTree = await readFileTree(dsDir, prune) + assert(!prunedTree.get(derivFile)) + }) }) diff --git a/src/files/deno.ts b/src/files/deno.ts index 3895a0cd..b7f8db61 100644 --- a/src/files/deno.ts +++ b/src/files/deno.ts @@ -117,6 +117,7 @@ async function _readFileTree( rootPath: string, relativePath: string, ignore: FileIgnoreRules, + prune: FileIgnoreRules, parent?: FileTree, ): Promise { await requestReadPermission() @@ -124,10 +125,14 @@ async function _readFileTree( const tree = new FileTree(relativePath, name, parent, ignore) for await (const dirEntry of Deno.readDir(join(rootPath, relativePath))) { + const thisPath = posix.join(relativePath, dirEntry.name) + if (prune.test(thisPath)) { + continue + } if (dirEntry.isFile || dirEntry.isSymlink) { const file = new BIDSFileDeno( rootPath, - posix.join(relativePath, dirEntry.name), + thisPath, ignore, ) file.parent = tree @@ -136,8 +141,9 @@ async function _readFileTree( if (dirEntry.isDirectory) { const dirTree = await _readFileTree( rootPath, - posix.join(relativePath, dirEntry.name), + thisPath, ignore, + prune, tree, ) tree.directories.push(dirTree) @@ -149,9 +155,13 @@ async function _readFileTree( /** * Read in the target directory structure and return a FileTree */ -export async function readFileTree(rootPath: string): Promise { +export async function readFileTree( + rootPath: string, + prune?: FileIgnoreRules, +): Promise { + prune ??= new FileIgnoreRules([], false) const ignore = new FileIgnoreRules([]) - const tree = await _readFileTree(rootPath, '/', ignore) + const tree = await _readFileTree(rootPath, '/', ignore, prune) const bidsignore = tree.get('.bidsignore') if (bidsignore) { try { diff --git a/src/files/ignore.ts b/src/files/ignore.ts index 6cb1cb35..3f362c08 100644 --- a/src/files/ignore.ts +++ b/src/files/ignore.ts @@ -27,10 +27,15 @@ const defaultIgnores = [ export class FileIgnoreRules { #ignore: Ignore - constructor(config: string[]) { + constructor( + config: string[], + addDefaults: boolean = true, + ) { // @ts-expect-error this.#ignore = ignore() - this.#ignore.add(defaultIgnores) + if (addDefaults) { + this.#ignore.add(defaultIgnores) + } this.#ignore.add(config) } diff --git a/src/main.ts b/src/main.ts index 56b28fff..35c127c6 100644 --- a/src/main.ts +++ b/src/main.ts @@ -3,6 +3,7 @@ import type { Config } from './setup/options.ts' import * as colors from '@std/fmt/colors' import { readFileTree } from './files/deno.ts' import { fileListToTree } from './files/browser.ts' +import { FileIgnoreRules } from './files/ignore.ts' import { resolve } from '@std/path' import { validate } from './validators/bids.ts' import { consoleFormat, resultToJSONStr } from './utils/output.ts' @@ -21,7 +22,10 @@ export async function main(): Promise { setupLogging(options.debug) const absolutePath = resolve(options.datasetPath) - const tree = await readFileTree(absolutePath) + const prune = options.prune + ? new FileIgnoreRules(['derivatives', 'sourcedata', 'code'], false) + : null + const tree = await readFileTree(absolutePath, prune) const config = options.config ? JSON.parse(Deno.readTextFileSync(options.config)) as Config : {} diff --git a/src/setup/options.ts b/src/setup/options.ts index 891fcfe3..9caa0fe3 100644 --- a/src/setup/options.ts +++ b/src/setup/options.ts @@ -29,6 +29,7 @@ export type ValidatorOptions = { recursive?: boolean outfile?: string blacklistModalities: string[] + prune?: boolean } const modalityType = new EnumType( @@ -72,6 +73,10 @@ export const validateCommand: Command = new Com '-r, --recursive', 'Validate datasets found in derivatives directories in addition to root dataset', ) + .option( + '-p, --prune', + 'Prune derivatives and sourcedata directories on load (disables -r and will underestimate dataset size)', + ) .option( '-o, --outfile ', 'File to write validation results to.', diff --git a/src/tests/regression.test.ts b/src/tests/regression.test.ts index 3d245416..e5aa0664 100644 --- a/src/tests/regression.test.ts +++ b/src/tests/regression.test.ts @@ -3,13 +3,12 @@ import { pathsToTree } from '../files/filetree.ts' import { validate } from '../validators/bids.ts' import type { BIDSFile } from '../types/filetree.ts' - Deno.test('Regression tests', async (t) => { await t.step('Verify ignored files in scans.tsv do not trigger error', async () => { const paths = [ '/dataset_description.json', '/sub-01/anat/sub-01_T1w.nii.gz', - '/sub-01/anat/sub-01_CT.nii.gz', // unknown file + '/sub-01/anat/sub-01_CT.nii.gz', // unknown file '/sub-01/sub-01_scans.tsv', ] const ignore = ['*_CT.nii.gz']