From 882b545c808d5e5f20317a3718188abc9f61f5b2 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Fri, 16 Feb 2024 00:28:29 +0800 Subject: [PATCH] Add a function to directly load file from strax folder (#801) * Add a function to directly load file from strax folder Using only information inside the folder * Add test --- strax/io.py | 37 +++++++++++++++++++++++++++++++++++++ tests/test_storage.py | 11 +++++++++++ 2 files changed, 48 insertions(+) diff --git a/strax/io.py b/strax/io.py index 80d3c71bc..9cec46d5d 100644 --- a/strax/io.py +++ b/strax/io.py @@ -2,11 +2,13 @@ import os import bz2 +import json import numpy as np import blosc import zstd import lz4.frame as lz4 +from ast import literal_eval import strax @@ -95,3 +97,38 @@ def _compress_blosc(data): COMPRESSORS["blosc"]["compress"] = _compress_blosc + + +@export +def dry_load_files(dirname, chunk_number=None): + prefix = strax.storage.files.dirname_to_prefix(dirname) + metadata_json = f"{prefix}-metadata.json" + md_path = os.path.join(dirname, metadata_json) + + with open(md_path, mode="r") as f: + metadata = json.loads(f.read()) + + dtype = literal_eval(metadata["dtype"]) + + results = [] + if chunk_number is None: + for chunk_info in metadata["chunks"]: + if chunk_info["n"] != 0: + results.append( + load_file( + os.path.join(dirname, f"{prefix}-{chunk_info['chunk_i']:06d}"), + metadata["compressor"], + dtype, + ) + ) + results = np.hstack(results) + else: + if chunk_number >= len(metadata["chunks"]): + raise ValueError(f"Chunk {chunk_number:06d} does not exist in {dirname}.") + if metadata["chunks"][chunk_number]["n"] != 0: + results = load_file( + os.path.join(dirname, f"{prefix}-{chunk_number:06d}"), + metadata["compressor"], + dtype, + ) + return results if len(results) else np.empty(0, dtype) diff --git a/tests/test_storage.py b/tests/test_storage.py index 437578f2d..b4cad71eb 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -131,6 +131,17 @@ def get_st_and_fill_frontends(self) -> ty.Tuple[strax.Context, dict]: return (strax.Context(storage=frontends, **self.context_kwargs), frontend_setup) + def test_dry_load_files(self): + """Test that dry_load_files can load the data.""" + st, frontend_setup = self.get_st_and_fill_frontends() + for sf in st.storage: + key = st.key_for(self.run_id, self.target) + dirname = os.path.join(sf.path, str(key)) + strax.io.dry_load_files(dirname) + strax.io.dry_load_files(dirname, 0) + with self.assertRaises(ValueError): + strax.io.dry_load_files(dirname, 99) + def test_close_goes_first_md(self): """Let's see that if we get the meta-data, it's from the one with the lowest remoteness.