|
| 1 | +"""Lookup Jenkins stage names for integration tests and vice versa. |
| 2 | +
|
| 3 | +This helper parses ``jenkins/L0_Test.groovy`` and the YAML files under |
| 4 | +``tests/integration/test_lists/test-db`` to provide a bidirectional mapping |
| 5 | +between test names and Jenkins stage names. When ``--tests`` or ``--test-list`` |
| 6 | +options are used, each value is treated as a substring pattern. Any test whose |
| 7 | +fully qualified name contains the pattern will be matched. If the pattern |
| 8 | +corresponds exactly to a test name, it naturally matches that test as well. |
| 9 | +
|
| 10 | +Example usage:: |
| 11 | +
|
| 12 | + python scripts/test_to_stage_mapping.py --tests \\ |
| 13 | + "triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]" |
| 14 | + python scripts/test_to_stage_mapping.py --tests gpt_ib_ptuning |
| 15 | + python scripts/test_to_stage_mapping.py --stages \\ |
| 16 | + A100X-Triton-Post-Merge-1 |
| 17 | +
|
| 18 | +Tests can also be provided via ``--test-list`` pointing to either a plain text |
| 19 | +file or a YAML list file. Quote individual test names on the command line so |
| 20 | +the shell does not interpret ``[`` and ``]`` characters. |
| 21 | +""" |
| 22 | + |
| 23 | +import argparse |
| 24 | +import os |
| 25 | +import re |
| 26 | +from collections import defaultdict |
| 27 | +from glob import glob |
| 28 | +from typing import List |
| 29 | + |
| 30 | +import yaml |
| 31 | + |
| 32 | + |
| 33 | +def _load_tests_file(path: str) -> List[str]: |
| 34 | + tests: List[str] = [] |
| 35 | + yaml_mode = path.endswith('.yml') or path.endswith('.yaml') |
| 36 | + with open(path, 'r') as f: |
| 37 | + for line in f: |
| 38 | + line = line.strip() |
| 39 | + if not line or line.startswith('#'): |
| 40 | + continue |
| 41 | + if yaml_mode: |
| 42 | + if line.startswith('- '): |
| 43 | + tests.append(line[2:].strip()) |
| 44 | + else: |
| 45 | + tests.append(line) |
| 46 | + return tests |
| 47 | + |
| 48 | + |
| 49 | +# Regex to parse Jenkins stage configurations from Groovy files |
| 50 | +# Matches patterns like: "Stage-Name": ["platform", "yaml_file", split_id, split_count, gpu_count] |
| 51 | +# |
| 52 | +# Pattern breakdown: |
| 53 | +# "(?P<stage>[^"]+)" - Captures stage name in quotes (group 'stage') |
| 54 | +# \s*:\s* - Matches colon with optional whitespace |
| 55 | +# \[ - Matches opening bracket |
| 56 | +# "[^"]+" - Matches platform string in quotes (ignored) |
| 57 | +# ,\s* - Matches comma with optional whitespace |
| 58 | +# "(?P<yml>[^"]+)" - Captures yaml filename in quotes (group 'yml') |
| 59 | +# (?:,\s*\d+)* - Matches zero or more comma-separated numbers (split_id, split_count, gpu_count) |
| 60 | +# \s*\] - Matches closing bracket with optional whitespace |
| 61 | +_STAGE_RE = re.compile( |
| 62 | + r'"(?P<stage>[^"]+)"\s*:\s*\["[^"]+",\s*"(?P<yml>[^"]+)"(?:,\s*\d+)*\s*\]') |
| 63 | + |
| 64 | + |
| 65 | +def _extract_terms(entry): |
| 66 | + """Extract terms from either direct 'terms' or 'condition.terms'.""" |
| 67 | + terms = entry.get('terms', {}) |
| 68 | + if not terms: |
| 69 | + terms = entry.get('condition', {}).get('terms', {}) |
| 70 | + return terms |
| 71 | + |
| 72 | + |
| 73 | +class StageQuery: |
| 74 | + |
| 75 | + def __init__(self, groovy_path: str, test_db_dir: str): |
| 76 | + self.stage_to_yaml, self.yaml_to_stages = self._parse_stage_mapping( |
| 77 | + groovy_path) |
| 78 | + self.test_map, self.yaml_stage_tests = self._parse_tests(test_db_dir) |
| 79 | + # Build dynamic backend mapping from discovered data |
| 80 | + self._backend_keywords = self._discover_backend_keywords() |
| 81 | + |
| 82 | + @staticmethod |
| 83 | + def _parse_stage_mapping(path): |
| 84 | + stage_to_yaml = {} |
| 85 | + yaml_to_stages = defaultdict(list) |
| 86 | + with open(path, 'r') as f: |
| 87 | + for line in f: |
| 88 | + m = _STAGE_RE.search(line) |
| 89 | + if m: |
| 90 | + stage = m.group('stage') |
| 91 | + yml = m.group('yml') + '.yml' |
| 92 | + stage_to_yaml[stage] = yml |
| 93 | + yaml_to_stages[yml].append(stage) |
| 94 | + return stage_to_yaml, yaml_to_stages |
| 95 | + |
| 96 | + def _parse_tests(self, db_dir): |
| 97 | + """Parse tests from YAML files, supporting both .yml and .yaml.""" |
| 98 | + test_map = defaultdict(list) |
| 99 | + yaml_stage_tests = defaultdict(lambda: defaultdict(list)) |
| 100 | + |
| 101 | + yaml_files = (glob(os.path.join(db_dir, '*.yml')) + |
| 102 | + glob(os.path.join(db_dir, '*.yaml'))) |
| 103 | + |
| 104 | + for path in yaml_files: |
| 105 | + with open(path, 'r') as f: |
| 106 | + data = yaml.safe_load(f) |
| 107 | + for key, entries in data.items(): |
| 108 | + if key == 'version' or entries is None: |
| 109 | + continue |
| 110 | + for entry in entries: |
| 111 | + terms = _extract_terms(entry) |
| 112 | + |
| 113 | + stage = terms.get('stage') |
| 114 | + if stage is None: |
| 115 | + continue |
| 116 | + |
| 117 | + backend = terms.get('backend', '') # Default to empty |
| 118 | + |
| 119 | + tests = entry.get('tests', []) |
| 120 | + yml = os.path.basename(path) |
| 121 | + for t in tests: |
| 122 | + test_map[t].append((yml, stage, backend)) |
| 123 | + yaml_stage_tests[yml][stage].append(t) |
| 124 | + return test_map, yaml_stage_tests |
| 125 | + |
| 126 | + def _discover_backend_keywords(self): |
| 127 | + """Discover backend keywords from existing data dynamically.""" |
| 128 | + backend_keywords = {} |
| 129 | + |
| 130 | + # Collect all backends from test data |
| 131 | + all_backends = set() |
| 132 | + for mappings in self.test_map.values(): |
| 133 | + for yml, stage_type, backend in mappings: |
| 134 | + if backend and backend.strip(): |
| 135 | + all_backends.add(backend.strip().lower()) |
| 136 | + |
| 137 | + # Map backends to their likely stage name keywords |
| 138 | + for backend in all_backends: |
| 139 | + backend_keywords[backend] = backend.upper() |
| 140 | + |
| 141 | + # Add common variations/aliases |
| 142 | + aliases = { |
| 143 | + 'tensorrt': ['TENSORRT', 'TRT'], |
| 144 | + 'pytorch': ['PYTORCH', 'TORCH'], |
| 145 | + 'cpp': ['CPP', 'C++'], |
| 146 | + 'triton': ['TRITON'] |
| 147 | + } |
| 148 | + |
| 149 | + for backend, keywords in aliases.items(): |
| 150 | + if backend in backend_keywords: |
| 151 | + backend_keywords[backend] = keywords |
| 152 | + |
| 153 | + return backend_keywords |
| 154 | + |
| 155 | + def search_tests(self, pattern: str): |
| 156 | + parts = pattern.split() |
| 157 | + result = [] |
| 158 | + for test in self.test_map: |
| 159 | + name = test.lower() |
| 160 | + if all(p.lower() in name for p in parts): |
| 161 | + result.append(test) |
| 162 | + return result |
| 163 | + |
| 164 | + def tests_to_stages(self, tests): |
| 165 | + result = set() |
| 166 | + for t in tests: |
| 167 | + for yml, stage_type, backend in self.test_map.get(t, []): |
| 168 | + for s in self.yaml_to_stages.get(yml, []): |
| 169 | + if stage_type == 'post_merge' and 'Post-Merge' not in s: |
| 170 | + continue |
| 171 | + if stage_type == 'pre_merge' and 'Post-Merge' in s: |
| 172 | + continue |
| 173 | + |
| 174 | + # Filter by backend if specified |
| 175 | + if backend and backend != '': |
| 176 | + backend_keywords = self._backend_keywords.get( |
| 177 | + backend.lower(), [backend.upper()]) |
| 178 | + if isinstance(backend_keywords, str): |
| 179 | + backend_keywords = [backend_keywords] |
| 180 | + |
| 181 | + if not any(keyword in s.upper() |
| 182 | + for keyword in backend_keywords): |
| 183 | + continue |
| 184 | + |
| 185 | + result.add(s) |
| 186 | + return sorted(result) |
| 187 | + |
| 188 | + def stages_to_tests(self, stages): |
| 189 | + result = set() |
| 190 | + for s in stages: |
| 191 | + yml = self.stage_to_yaml.get(s) |
| 192 | + if not yml: |
| 193 | + continue |
| 194 | + stage_type = 'post_merge' if 'Post-Merge' in s else 'pre_merge' |
| 195 | + |
| 196 | + # Determine expected backend dynamically from stage name |
| 197 | + expected_backend = None |
| 198 | + stage_upper = s.upper() |
| 199 | + for backend, keywords in self._backend_keywords.items(): |
| 200 | + if isinstance(keywords, str): |
| 201 | + keywords = [keywords] |
| 202 | + if any(keyword in stage_upper for keyword in keywords): |
| 203 | + expected_backend = backend |
| 204 | + break |
| 205 | + |
| 206 | + # Get all tests for yml/stage_type, then filter by backend |
| 207 | + all_tests = self.yaml_stage_tests.get(yml, {}).get(stage_type, []) |
| 208 | + for test in all_tests: |
| 209 | + # Check if test's backend matches stage's expected backend |
| 210 | + test_mappings = self.test_map.get(test, []) |
| 211 | + for test_yml, test_stage, test_backend in test_mappings: |
| 212 | + if (test_yml == yml and test_stage == stage_type |
| 213 | + and (expected_backend is None |
| 214 | + or test_backend == expected_backend)): |
| 215 | + result.add(test) |
| 216 | + break |
| 217 | + return sorted(result) |
| 218 | + |
| 219 | + |
| 220 | +def main(): |
| 221 | + parser = argparse.ArgumentParser( |
| 222 | + description='Map Jenkins stages to tests and vice versa.') |
| 223 | + group = parser.add_mutually_exclusive_group(required=True) |
| 224 | + group.add_argument( |
| 225 | + '--tests', |
| 226 | + nargs='+', |
| 227 | + help='One or more test name patterns to resolve to Jenkins stages') |
| 228 | + group.add_argument( |
| 229 | + '--test-list', |
| 230 | + help=('File with test name patterns, either newline separated ' |
| 231 | + 'or a YAML list')) |
| 232 | + group.add_argument('--stages', |
| 233 | + nargs='+', |
| 234 | + help='List of stage names to look up') |
| 235 | + parser.add_argument('--repo-root', |
| 236 | + default=os.path.dirname(os.path.dirname(__file__)), |
| 237 | + help='Path to repository root') |
| 238 | + args = parser.parse_args() |
| 239 | + |
| 240 | + groovy = os.path.join(args.repo_root, 'jenkins', 'L0_Test.groovy') |
| 241 | + db_dir = os.path.join(args.repo_root, 'tests', 'integration', 'test_lists', |
| 242 | + 'test-db') |
| 243 | + query = StageQuery(groovy, db_dir) |
| 244 | + |
| 245 | + if args.tests or args.test_list: |
| 246 | + patterns = [] |
| 247 | + if args.tests: |
| 248 | + patterns.extend(args.tests) |
| 249 | + if args.test_list: |
| 250 | + patterns.extend(_load_tests_file(args.test_list)) |
| 251 | + |
| 252 | + collected = [] |
| 253 | + for pat in patterns: |
| 254 | + collected.extend(query.search_tests(pat)) |
| 255 | + tests = sorted(set(collected)) |
| 256 | + stages = query.tests_to_stages(tests) |
| 257 | + for s in stages: |
| 258 | + print(s) |
| 259 | + else: |
| 260 | + tests = query.stages_to_tests(args.stages) |
| 261 | + for t in tests: |
| 262 | + print(t) |
| 263 | + |
| 264 | + |
| 265 | +if __name__ == '__main__': |
| 266 | + main() |
0 commit comments