-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filter external repos #396
base: main
Are you sure you want to change the base?
Changes from all commits
7c36999
b1b79bd
7e5aadb
c458de2
77774ee
a01827e
e3dbf12
fcd2970
8a46d1c
6a86865
388910b
0163e78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from enum import Enum | ||
from pathlib import Path | ||
from typing import Annotated, Optional | ||
from typing import Any | ||
from fnmatch import fnmatch | ||
|
||
import yaml | ||
from git import Repo | ||
|
@@ -11,29 +12,83 @@ | |
field_validator, | ||
model_validator, | ||
ConfigDict, | ||
BeforeValidator, | ||
) | ||
from nomenclature.code import Code | ||
|
||
|
||
class RepositoryWithFilter(BaseModel): | ||
name: str | ||
include: list[dict[str, Any]] = [{"name": "*"}] | ||
exclude: list[dict[str, Any]] = Field(default_factory=list) | ||
|
||
def filter_function(self, code: Code, filter: dict[str, Any], keep: bool): | ||
# if is list -> recursive | ||
# if is str -> fnmatch | ||
# if is int -> match exactly | ||
# if is None -> Attribute does not exist therefore does not match | ||
def check_attribute_match(code_value, filter_value): | ||
if isinstance(filter_value, int): | ||
return code_value == filter_value | ||
if isinstance(filter_value, str): | ||
return fnmatch(code_value, filter_value) | ||
if isinstance(filter_value, list): | ||
return any( | ||
check_attribute_match(code_value, value) for value in filter_value | ||
) | ||
if filter_value is None: | ||
return False | ||
raise ValueError("Something went wrong with the filtering") | ||
|
||
filter_match = all( | ||
check_attribute_match(getattr(code, attribute, None), value) | ||
for attribute, value in filter.items() | ||
) | ||
if keep: | ||
return filter_match | ||
else: | ||
return not filter_match | ||
|
||
def filter_list_of_codes(self, list_of_codes: list[Code]) -> list[Code]: | ||
# include first | ||
filter_result = [ | ||
code | ||
for code in list_of_codes | ||
if any( | ||
self.filter_function( | ||
code, | ||
filter, | ||
keep=True, | ||
) | ||
for filter in self.include | ||
) | ||
] | ||
|
||
if self.exclude: | ||
filter_result = [ | ||
code | ||
for code in filter_result | ||
if any( | ||
self.filter_function(code, filter, keep=False) | ||
for filter in self.exclude | ||
) | ||
] | ||
|
||
|
||
def convert_to_set(v: str | list[str] | set[str]) -> set[str]: | ||
match v: | ||
case set(v): | ||
return v | ||
case list(v): | ||
return set(v) | ||
case str(v): | ||
return {v} | ||
case _: | ||
raise TypeError("`repositories` must be of type str, list or set.") | ||
return filter_result | ||
|
||
|
||
class CodeListConfig(BaseModel): | ||
dimension: str | None = None | ||
repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field( | ||
default_factory=set, alias="repository" | ||
repositories: list[RepositoryWithFilter] = Field( | ||
default_factory=list, alias="repository" | ||
) | ||
model_config = ConfigDict(populate_by_name=True) | ||
|
||
@field_validator("repositories", mode="before") | ||
def convert_to_set_of_repos(cls, v): | ||
if not isinstance(v, list): | ||
return [v] | ||
return v | ||
|
||
@property | ||
def repository_dimension_path(self) -> str: | ||
return f"definitions/{self.dimension}" | ||
|
@@ -109,8 +164,8 @@ class DataStructureConfig(BaseModel): | |
|
||
""" | ||
|
||
region: Optional[RegionCodeListConfig] = Field(default_factory=RegionCodeListConfig) | ||
variable: Optional[CodeListConfig] = Field(default_factory=CodeListConfig) | ||
region: RegionCodeListConfig = Field(default_factory=RegionCodeListConfig) | ||
variable: CodeListConfig = Field(default_factory=CodeListConfig) | ||
|
||
@field_validator("region", "variable", mode="before") | ||
@classmethod | ||
|
@@ -126,12 +181,22 @@ def repos(self) -> dict[str, str]: | |
} | ||
|
||
|
||
class MappingRepository(BaseModel): | ||
name: str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Das The mapping will also have to inherit the region-filters, right? Otherwise, a model could map to a region that is not included in the DataStructureDefinition. |
||
|
||
|
||
class RegionMappingConfig(BaseModel): | ||
repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field( | ||
default_factory=set, alias="repository" | ||
repositories: list[MappingRepository] = Field( | ||
default_factory=list, alias="repository" | ||
) | ||
model_config = ConfigDict(populate_by_name=True) | ||
|
||
@field_validator("repositories", mode="before") | ||
def convert_to_set_of_repos(cls, v): | ||
if not isinstance(v, list): | ||
return [v] | ||
return v | ||
|
||
|
||
class DimensionEnum(str, Enum): | ||
model = "model" | ||
|
@@ -157,8 +222,9 @@ def check_definitions_repository( | |
mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {} | ||
repos = {**v.definitions.repos, **mapping_repos} | ||
for use, repositories in repos.items(): | ||
if repositories - v.repositories.keys(): | ||
raise ValueError((f"Unknown repository {repositories} in '{use}'.")) | ||
repository_names = [repository.name for repository in repositories] | ||
if unknown_repos := repository_names - v.repositories.keys(): | ||
raise ValueError((f"Unknown repository {unknown_repos} in '{use}'.")) | ||
return v | ||
|
||
def fetch_repos(self, target_folder: Path): | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
repositories: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest to add more structure to the validation test data by using subfolders. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, good idea. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #399 implements a cleanup of the test data folder, once that PR is merged, I'll rebase this one |
||
common-definitions: | ||
url: https://github.com/IAMconsortium/common-definitions.git/ | ||
legacy-definitions: | ||
url: https://github.com/IAMconsortium/legacy-definitions.git/ | ||
definitions: | ||
variable: | ||
repository: | ||
- name: common-definitions | ||
filters: | ||
- name: [Primary Energy*, Final Energy*] | ||
- name: "Population*" | ||
tier: 1 | ||
- name: legacy-definitions | ||
region: | ||
repository: common-definitions | ||
country: true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a misleading example - it seems to show only level-2 exclusion when in fact it excludes all variables at level 2 or below. Better to use the level-argument explicitly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems that this is subjective then. For me it was totally clear that this excludes anything level 2 and beyond.
I can see your point though about this being ambiguous