Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mix of dir and files allowed. this ref #294 #296

Merged
merged 14 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/imars3d/backend/dataio/data.py
KedoKudo marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@

Notes
-----
There are two main signatures to load the data:
There are three main signatures to load the data:
1. load_data(ct_files=ctfs, ob_files=obfs, dc_files=dcfs)
2. load_data(ct_dir=ctdir, ob_dir=obdir, dc_dir=dcdir)
3. load_data(ct_dir=ctdir, ob_files=obfs, dc_files=dcfs)

The two signatures are mutually exclusive, and dc_files and dc_dir are optional
in both cases as some experiments do not have dark current measurements.
In all signatures dc_files and dc_dir are optional

The fnmatch selectors are applicable in both signature, which help to down-select
The fnmatch selectors are applicable in all signature, which help to down-select
files if needed. Default is set to "*", which selects everything.
Also, if ob_fnmatch and dc_fnmatch are set to "None" in the second signature call, the
data loader will attempt to read the metadata embedded in the first ct file to find obs
Expand Down Expand Up @@ -157,9 +157,43 @@
# use set to simplify call signature checking
sigs = set([k.split("_")[-1] for k in params.keys() if "fnmatch" not in k])
ref = {"files", "dir"}
if sigs.intersection(ref) == {"files", "dir"}:
logger.error("Files and dir cannot be used at the same time")
raise ValueError("Mix usage of allowed signature.")

if ("ct_dir" in params.keys()) and ("ob_files" in params.keys()):
logger.debug("Load ct by directory, ob and dc (if any) by files")
ct_dir = params.get("ct_dir")
if not Path(ct_dir).exists():
logger.error(f"ct_dir {ct_dir} does not exist.")
raise ValueError("ct_dir does not exist.")

Check warning on line 166 in src/imars3d/backend/dataio/data.py

View check run for this annotation

Codecov / codecov/patch

src/imars3d/backend/dataio/data.py#L165-L166

Added lines #L165 - L166 were not covered by tests
else:
ct_dir = Path(ct_dir)

# gather the ct_files
ct_fnmatch = params.get("ct_fnmatch", "*")
ct_files = ct_dir.glob(ct_fnmatch)
ct_files = list(map(str, ct_files))
ct_files.sort()

ob_files = (params.get("ob_files"),)
dc_files = (params.get("dc_files", []),) # it is okay to skip dc

ob_files = ob_files[0]
dc_files = dc_files[0]

ct, ob, dc = _load_by_file_list(
ct_files=ct_files,
ob_files=ob_files,
dc_files=dc_files, # it is okay to skip dc
ct_fnmatch=params.get("ct_fnmatch", "*"), # incase None got leaked here
ob_fnmatch=params.get("ob_fnmatch", "*"),
dc_fnmatch=params.get("dc_fnmatch", "*"),
max_workers=self.max_workers,
tqdm_class=params.tqdm_class,
)

elif ("ct_files" in params.keys()) and ("ob_dir" in params.keys()):
logger.error("ct_files and ob_dir mixed not allowed!")
raise ValueError("Mix signatures (ct_files, ob_dir) not allowed!")

Check warning on line 195 in src/imars3d/backend/dataio/data.py

View check run for this annotation

Codecov / codecov/patch

src/imars3d/backend/dataio/data.py#L194-L195

Added lines #L194 - L195 were not covered by tests

elif sigs.intersection(ref) == {"files"}:
logger.debug("Load by file list")
ct, ob, dc = _load_by_file_list(
Expand Down
12 changes: 7 additions & 5 deletions tests/unit/backend/dataio/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def test_load_data(
# error_0: incorrect input argument types
with pytest.raises(ValueError):
load_data(ct_files=1, ob_files=[], dc_files=[])
load_data(ct_dir=1, ob_files=[])
load_data(ct_files=[], ob_dir="/tmp")
load_data(ct_files=[], ob_files=[], dc_files=[], ct_fnmatch=1)
load_data(ct_files=[], ob_files=[], dc_files=[], ob_fnmatch=1)
load_data(ct_files=[], ob_files=[], dc_files=[], dc_fnmatch=1)
Expand All @@ -106,16 +108,16 @@ def test_load_data(
# error_1: out of bounds value
with pytest.raises(ValueError):
load_data(ct_files=[], ob_files=[], dc_files=[], max_workers=-1)
# error_2: mix usage of function signature 1 and 2
with pytest.raises(ValueError):
load_data(ct_files=[], ob_files=[], dc_files=[], ct_dir="/tmp", ob_dir="/tmp")
# error_3: no valid signature found
with pytest.raises(ValueError):
load_data(ct_fnmatch=1)
# case_0: load data from file list
# case_0: load ct from directory, ob and dc from files
rst = load_data(ct_dir="/tmp", ob_files=["3", "4"], dc_files=["5", "6"])
np.testing.assert_almost_equal(np.array(rst).flatten(), np.arange(1, 5, dtype=float))
# case_1: load data from file list
rst = load_data(ct_files=["1", "2"], ob_files=["3", "4"], dc_files=["5", "6"])
np.testing.assert_almost_equal(np.array(rst).flatten(), np.arange(1, 5, dtype=float))
# case_1: load data from given directory
# case_2: load data from given directory
rst = load_data(ct_dir="/tmp", ob_dir="/tmp", dc_dir="/tmp")
np.testing.assert_almost_equal(np.array(rst).flatten(), np.arange(1, 5, dtype=float))

Expand Down
Loading