Skip to content

Commit 918abdd

Browse files
authored
Merge pull request #23 from ssl-hep/feat/get-metadata
Add metadata information to file peeking
2 parents f286db0 + bf7291b commit 918abdd

File tree

5 files changed

+151
-9
lines changed

5 files changed

+151
-9
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ servicex.yaml
1313

1414
#Testing
1515
samples_structure.txt
16-
16+
tmp_test.py

servicex_analysis_utils/file_peeking.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
from servicex.dataset_identifier import DataSetIdentifier
3636

3737

38-
def run_query(input_filenames):
38+
def run_query(
39+
input_filenames,
40+
):
3941
import uproot
4042
import awkward as ak
4143
import json
@@ -59,13 +61,24 @@ def is_tree(obj):
5961
tree_dict = {}
6062

6163
with uproot.open(input_filenames) as file:
64+
6265
for tree_name in file.keys():
6366
tree_name_clean = tree_name.rstrip(";1")
6467
tree = file[tree_name]
6568

6669
if not is_tree(tree):
6770
continue
6871

72+
if tree_name_clean == "MetaData":
73+
fm_branches = [
74+
b for b in tree.keys() if b.startswith("FileMetaDataAuxDyn.")
75+
]
76+
# remove the prefix in keys
77+
meta_dict = {
78+
p[19:]: str(tree[p].array(library="ak")[0]) for p in fm_branches
79+
}
80+
tree_dict["FileMetaData"] = meta_dict
81+
6982
branch_dict = {}
7083
for branch_name, branch in tree.items():
7184
branch_type = str(branch.interpretation)
@@ -174,9 +187,6 @@ def print_structure_from_str(
174187
import json
175188

176189
output_lines = []
177-
output_lines.append(
178-
f"\nFile structure of all samples with branch filter '{filter_branch}':"
179-
)
180190

181191
for sample_name, path in deliver_dict.items():
182192
structure_str = open_delivered_file(sample_name, path)
@@ -186,9 +196,25 @@ def print_structure_from_str(
186196
structure_dict = json.loads(structure_str)
187197

188198
output_lines.append(
189-
f"\n---------------------------\n"
199+
"\n---------------------------\n"
190200
f"\U0001f4c1 Sample: {sample_name}\n"
191-
f"---------------------------"
201+
"---------------------------"
202+
)
203+
204+
# Get the metadata first
205+
output_lines.append("\nFile Metadata \u2139\ufe0f :\n")
206+
if "FileMetaData" not in structure_dict:
207+
output_lines.append("No FileMetaData found in dataset.")
208+
else:
209+
for key, value in structure_dict.get("FileMetaData", {}).items():
210+
output_lines.append(f"── {key}: {value}")
211+
output_lines.append("\n---------------------------")
212+
213+
# drop the File metadata from the trees
214+
structure_dict.pop("FileMetaData", {})
215+
216+
output_lines.append(
217+
f"\nFile structure with branch filter \U0001f33f '{filter_branch}':\n"
192218
)
193219

194220
for tree_name, branches in structure_dict.items():
@@ -259,6 +285,8 @@ def str_to_array(encoded_json_str):
259285
"""
260286
reconstructed_data = {}
261287
structure_dict = json.loads(encoded_json_str)
288+
# drop the File metadata from the trees
289+
structure_dict.pop("FileMetaData", {})
262290

263291
for treename, branch_dict in structure_dict.items():
264292
branches = {}

tests/data/expected_metadata.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
---------------------------
3+
📁 Sample: test_file
4+
---------------------------
5+
6+
File Metadata ℹ️ :
7+
8+
── test_100: 100
9+
── test_abc: abc
10+
11+
---------------------------
12+
13+
File structure with branch filter 🌿 '':
14+
15+
16+
🌳 Tree: MetaData
17+
├── Branches:
18+
│ ├── FileMetaDataAuxDyn.test_100 ; dtype: AsDtype('>i8')
19+
│ ├── FileMetaDataAuxDyn.test_abc ; dtype: AsStrings()

tests/data/expected_structure.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11

2-
File structure of all samples with branch filter '':
3-
42
---------------------------
53
📁 Sample: test_file
64
---------------------------
75

6+
File Metadata ℹ️ :
7+
8+
No FileMetaData found in dataset.
9+
10+
---------------------------
11+
12+
File structure with branch filter 🌿 '':
13+
14+
815
🌳 Tree: background
916
├── Branches:
1017
│ ├── branch1 ; dtype: AsDtype('>f8')
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import pytest
29+
import uproot
30+
import json
31+
import os
32+
from servicex_analysis_utils import file_peeking
33+
from pathlib import Path
34+
35+
36+
@pytest.fixture
37+
def build_test_samples(tmp_path):
38+
39+
test_path = str(tmp_path / "test_metadata.root")
40+
# example data for two branches
41+
tree_data = {
42+
"FileMetaDataAuxDyn.test_100": [100],
43+
"FileMetaDataAuxDyn.test_abc": ["abc"],
44+
}
45+
46+
# Create tmp .root files
47+
with uproot.create(test_path) as file:
48+
file["MetaData"] = tree_data
49+
50+
return test_path
51+
52+
53+
# Test run_query and print_structure_from_str
54+
def test_metadata_retrieval(build_test_samples, tmp_path, capsys):
55+
56+
path = build_test_samples
57+
query_output = file_peeking.run_query(path)
58+
# Check result
59+
expected_result = {
60+
"FileMetaData": {"test_100": "100", "test_abc": "abc"},
61+
"MetaData": {
62+
"FileMetaDataAuxDyn.test_100": "AsDtype('>i8')",
63+
"FileMetaDataAuxDyn.test_abc": "AsStrings()",
64+
},
65+
}
66+
encoded_result = json.loads(query_output[0])
67+
68+
assert encoded_result == expected_result
69+
70+
# Produce servicex.deliver() like dict
71+
# i.e {"Sample Name":"Path"}
72+
tree_data = {"branch": query_output}
73+
with uproot.create(tmp_path / "encoded.root") as file:
74+
file["servicex"] = tree_data
75+
assert os.path.exists(
76+
tmp_path / "encoded.root"
77+
), f"servicex-like test file not found."
78+
deliver_dict = {"test_file": [str(tmp_path / "encoded.root")]}
79+
80+
## Test str formating
81+
output_str = file_peeking.print_structure_from_str(deliver_dict)
82+
83+
expected_path = Path("tests/data/expected_metadata.txt")
84+
expected = expected_path.read_text(encoding="utf-8")
85+
86+
assert (
87+
expected == output_str
88+
), f"Output does not match expected.\n Output: {output_str}"

0 commit comments

Comments
 (0)