From 5d37d0a81b2f2e2b1abeaac52241af66d6b3ee19 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Wed, 8 Nov 2023 23:19:19 +0100 Subject: [PATCH 1/8] Add `import_ecoinvent_release` utility --- bw2io/__init__.py | 12 +- bw2io/ecoinvent.py | 260 +++++++++++++++++++++++++ bw2io/extractors/excel.py | 17 +- bw2io/importers/ecoinvent_lcia.py | 7 +- bw2io/importers/ecospold2.py | 20 +- bw2io/importers/ecospold2_biosphere.py | 26 ++- bw2io/strategies/ecospold2.py | 4 +- 7 files changed, 310 insertions(+), 36 deletions(-) create mode 100644 bw2io/ecoinvent.py diff --git a/bw2io/__init__.py b/bw2io/__init__.py index ca05841c..11e77c1c 100644 --- a/bw2io/__init__.py +++ b/bw2io/__init__.py @@ -28,6 +28,7 @@ "exiobase_monetary", "get_csv_example_filepath", "get_xlsx_example_filepath", + "import_ecoinvent_release", "install_project", "lci_matrices_to_excel", "lci_matrices_to_matlab", @@ -41,9 +42,9 @@ "SimaProLCIACSVImporter", "SingleOutputEcospold1Importer", "SingleOutputEcospold2Importer", - "useeio11", "unlinked_data", "UnlinkedData", + "useeio11", ] from .version import version as __version__ @@ -94,6 +95,15 @@ from .utils import activity_hash, es2_activity_hash, load_json_data_file from .remote import install_project +try: + from .ecoinvent import import_ecoinvent_release +except ImportError: + import warnings + + def import_ecoinvent_release(*args, **kwargs): + warnings.warn("Please install `ecoinvent_interface` to use this function") + + from bw2data import config, databases config.metadata.extend( diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py new file mode 100644 index 00000000..2f560300 --- /dev/null +++ b/bw2io/ecoinvent.py @@ -0,0 +1,260 @@ +import re +import zipfile +from collections import defaultdict +from pathlib import Path + +import bw2data as bd +import ecoinvent_interface as ei +import openpyxl +from ecoinvent_interface.core import SYSTEM_MODELS +from ecoinvent_interface.string_distance import damerau_levenshtein + +from .extractors import ExcelExtractor +from .importers import ( + EcoinventLCIAImporter, + Ecospold2BiosphereImporter, + SingleOutputEcospold2Importer, +) + + +def get_excel_sheet_names(file_path: Path) -> list[str]: + """Read XML metadata file instead of using openpyxl, which loads the whole workbook. + + From https://stackoverflow.com/questions/12250024/how-to-obtain-sheet-names-from-xls-files-without-loading-the-whole-file. + """ + sheets = [] + with zipfile.ZipFile(file_path, "r") as zip_ref: + xml = zip_ref.read("xl/workbook.xml").decode("utf-8") + for s_tag in re.findall("]*", xml): + sheets.append(re.search('name="[^"]*', s_tag).group(0)[6:]) + return sheets + + +def header_dict(array: list) -> list[dict]: + return [ + {header.lower(): value for header, value in zip(array[0], row)} + for row in array[1:] + ] + + +def drop_unspecified(a: str, b: str, c: str) -> tuple: + if c.lower() == "unspecified": + return (a, b) + else: + return (a, b, c) + + +def pick_a_unit_label_already(obj: dict) -> str: + candidates = ("indicator unit", "unit", "unitname", "impact score unit") + for candidate in candidates: + if candidate in obj: + return candidate + raise KeyError("Can't find suitable column label for LCIA units") + + +def import_ecoinvent_release( + version: str, + system_model: str, + username: str | None = None, + password: str | None = None, + lci: bool = True, + lcia: bool = False, + biosphere_name: str | None = None, +) -> None: + """Import an ecoinvent LCI and optionally LCIA database. + + Uses [ecoinvent_interface](https://github.com/brightway-lca/ecoinvent_interface). Auth credentials are optional as they can be set externally (see the `ecoinvent_interface` documentation), and such permanent storage is highly recommended. + + The biosphere database must not exist. It is too much work to do selective updates. + + Parameters + ---------- + version + The ecoinvent release version as a string, e.g. '3.9.1' + system_model + The system model as a string in short or long form, e.g. 'apos' or 'Allocation cut-off by classification' + username + ecoinvent username + password + ecoinvent password + lci + Flag on whether to import the inventory database + lcia + Flag on whether to import the LCIA impact categories + biosphere_name + Name of database to store biosphere flows. They will be stored in the main LCI database if not specified. + + """ + from . import create_core_migrations, migrations + + if not len(migrations): + create_core_migrations() + + if username is None and password is None: + settings = ei.Settings() + else: + settings = ei.Settings(username=username, password=password) + if not settings.username or not settings.password: + raise ValueError("Can't determine ecoinvent username or password") + + release = ei.EcoinventRelease(settings) + if not version in release.list_versions(): + raise ValueError(f"Invalid version {version}") + + if system_model in SYSTEM_MODELS: + system_model = SYSTEM_MODELS[system_model] + if not system_model in release.list_system_models(version): + raise ValueError(f"Invalid system model {system_model}") + + if biosphere_name is None: + biosphere_name = f"ecoinvent-{version}-biosphere" + if lci: + if biosphere_name in bd.databases: + raise ValueError(f"Biosphere database {biosphere_name} already exists") + db_name = f"ecoinvent-{version}-{system_model}" + if db_name in bd.databases: + raise ValueError(f"Database {db_name} already exists") + + lci_path = release.get_release( + version=version, + system_model=system_model, + release_type=ei.ReleaseType.ecospold, + ) + + eb = Ecospold2BiosphereImporter( + name=biosphere_name, + filepath=lci_path / "MasterData" / "ElementaryExchanges.xml", + ) + eb.apply_strategies() + if not eb.all_linked: + raise ValueError( + f"Can't ingest biosphere database {biosphere_name} - unlinked flows." + ) + eb.write_database(overwrite=False) + bd.preferences["biosphere_database"] = biosphere_name + + soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name) + soup.apply_strategies() + if not soup.all_linked: + raise ValueError( + f"Can't ingest inventory database {db_name} - unlinked flows." + ) + soup.write_database() + + if lcia: + if biosphere_name is None: + biosphere_name = bd.config.biosphere + if biosphere_name not in bd.databases or not len(bd.Database(biosphere_name)): + raise ValueError( + f"Can't find populated biosphere flow database {biosphere_name}" + ) + + lcia_file = ei.get_excel_lcia_file_for_version(release=release, version=version) + sheet_names = get_excel_sheet_names(lcia_file) + + if "units" in sheet_names: + units_sheetname = "units" + elif "Indicators" in sheet_names: + units_sheetname = "Indicators" + else: + raise ValueError( + f"Can't find worksheet for impact category units in {sheet_names}" + ) + + if "CFs" not in sheet_names: + raise ValueError( + f"Can't find worksheet for characterization factors; expected `CFs`, found {sheet_names}" + ) + + data = dict(ExcelExtractor.extract(lcia_file)) + units = header_dict(data[units_sheetname]) + + cfs = header_dict(data["CFs"]) + + CF_COLUMN_LABELS = { + "3.4": "cf 3.4", + "3.5": "cf 3.5", + "3.6": "cf 3.6", + } + cf_col_label = CF_COLUMN_LABELS.get(version, "cf") + units_col_label = pick_a_unit_label_already(units[0]) + units_mapping = { + (row["method"], row["category"], row["indicator"]): row[units_col_label] + for row in units + } + + biosphere_mapping = {} + for flow in bd.Database(biosphere_name): + biosphere_mapping[(flow["name"],) + tuple(flow["categories"])] = flow.id + if flow["name"].startswith("[Deleted]"): + biosphere_mapping[ + (flow["name"].replace("[Deleted]", ""),) + tuple(flow["categories"]) + ] = flow.id + + lcia_data_as_dict = defaultdict(list) + + unmatched = set() + substituted = set() + + for row in cfs: + impact_category = (row["method"], row["category"], row["indicator"]) + if row[cf_col_label] is None: + continue + try: + lcia_data_as_dict[impact_category].append( + ( + biosphere_mapping[ + drop_unspecified( + row["name"], row["compartment"], row["subcompartment"] + ) + ], + float(row[cf_col_label]), + ) + ) + except KeyError: + # How is this possible? We are matching ecoinvent data against + # ecoinvent data from the same release! And yet it moves... + category = ( + (row["compartment"], row["subcompartment"]) + if row["subcompartment"].lower() != "unspecified" + else (row["compartment"],) + ) + same_context = { + k[0]: v for k, v in biosphere_mapping.items() if k[1:] == category + } + candidates = sorted( + [ + (damerau_levenshtein(name, row["name"]), name) + for name in same_context + ] + ) + if candidates[0][0] < 3 and candidates[0][0] != candidates[1][0] and candidates[0][1][0].lower() == row['name'][0].lower(): + new_name = candidates[0][1] + pair = (new_name, row['name']) + if pair not in substituted: + print(f"Substituting {new_name} for {row['name']}") + substituted.add(pair) + lcia_data_as_dict[impact_category].append( + ( + same_context[new_name], + float(row[cf_col_label]), + ) + ) + else: + if row["name"] not in unmatched: + print( + "Skipping unmatched flow {}:({}, {})".format( + row["name"], row["compartment"], row["subcompartment"] + ) + ) + unmatched.add(row["name"]) + + for key in lcia_data_as_dict: + method = bd.Method(key) + method.register( + unit=units_mapping.get(key, "Unknown"), + filepath=str(lcia_file), + ecoinvent_version=version, + database=biosphere_name, + ) + method.write(lcia_data_as_dict[key]) diff --git a/bw2io/extractors/excel.py b/bw2io/extractors/excel.py index 98821a3b..c210e32f 100644 --- a/bw2io/extractors/excel.py +++ b/bw2io/extractors/excel.py @@ -1,9 +1,10 @@ +from pathlib import Path import os -from openpyxl import load_workbook +from openpyxl import load_workbook, cell, workbook -def get_cell_value_handle_error(cell): +def get_cell_value_handle_error(cell: cell.cell.Cell): """ Retrieve the value of a given cell and handle error types. @@ -34,7 +35,7 @@ def get_cell_value_handle_error(cell): return cell.value -class ExcelExtractor(object): +class ExcelExtractor: """ A class used to extract data from an Excel file. @@ -78,7 +79,7 @@ class ExcelExtractor(object): >>> data = extractor.extract(filepath) """ @classmethod - def extract(cls, filepath): + def extract(cls, filepath: Path): """ Extract data from an Excel file. @@ -97,14 +98,15 @@ def extract(cls, filepath): AssertionError If the file at 'filepath' does not exist. """ - assert os.path.exists(filepath), "Can't file file at path {}".format(filepath) + filepath = Path(filepath) + assert filepath.is_file(), "Can't file file at path {}".format(filepath) wb = load_workbook(filepath, data_only=True, read_only=True) data = [(name, cls.extract_sheet(wb, name)) for name in wb.sheetnames] wb.close() return data @classmethod - def extract_sheet(cls, wb, name, strip=True): + def extract_sheet(cls, wb: workbook.Workbook, name: str, strip: bool=True): """ Extract data from a single sheet in an Excel workbook. @@ -134,6 +136,7 @@ def extract_sheet(cls, wb, name, strip=True): """ ws = wb[name] _ = lambda x: x.strip() if (strip and hasattr(x, "strip")) else x - return [ + provisional = [ [_(get_cell_value_handle_error(cell)) for cell in row] for row in ws.rows ] + return [line for line in provisional if any(line)] diff --git a/bw2io/importers/ecoinvent_lcia.py b/bw2io/importers/ecoinvent_lcia.py index 144a586b..ffbccdce 100644 --- a/bw2io/importers/ecoinvent_lcia.py +++ b/bw2io/importers/ecoinvent_lcia.py @@ -22,7 +22,7 @@ class EcoinventLCIAImporter(LCIAImporter): """ - def __init__(self): + def __init__(self, biosphere_database: str | None = None): """Initialize an instance of EcoinventLCIAImporter. Defines strategies in ``__init__`` because ``config.biosphere`` is dynamic. @@ -33,7 +33,7 @@ def __init__(self): drop_unspecified_subcategories, functools.partial( link_iterable_by_fields, - other=Database(config.biosphere), + other=Database(biosphere_database or config.biosphere), fields=("name", "categories"), ), ] @@ -42,7 +42,6 @@ def __init__(self): self.separate_methods() def add_rationalize_method_names_strategy(self): - """Add the `rationalize_method_names` strategy to the list of strategies""" self.strategies.append(rationalize_method_names) def separate_methods(self): @@ -82,4 +81,4 @@ def separate_methods(self): ) self.data = list(self.data.values()) - \ No newline at end of file + diff --git a/bw2io/importers/ecospold2.py b/bw2io/importers/ecospold2.py index df4d176f..63c9c11e 100644 --- a/bw2io/importers/ecospold2.py +++ b/bw2io/importers/ecospold2.py @@ -1,3 +1,4 @@ +from typing import Any from functools import partial from pathlib import Path from time import time @@ -46,16 +47,17 @@ class SingleOutputEcospold2Importer(LCIImporter): """ - format = u"Ecospold2" + format = "Ecospold2" def __init__( self, - dirpath, - db_name, - extractor=Ecospold2DataExtractor, - use_mp=True, - signal=None, - reparametrize_lognormals=False, + dirpath: str, + db_name: str, + biosphere_database: str | None = None, + extractor: Any=Ecospold2DataExtractor, + use_mp: bool=True, + signal: Any=None, + reparametrize_lognormals: bool=False, ): """ @@ -67,6 +69,8 @@ def __init__( Path to the directory containing the ecospold2 file. db_name : str Name of the LCI database. + biosphere_database : str | None + Name of biosphere database to link to. Uses `config.biosphere` if not provided. extractor : class Class for extracting data from the ecospold2 file, by default Ecospold2DataExtractor. use_mp : bool @@ -98,7 +102,7 @@ def __init__( drop_unspecified_subcategories, fix_ecoinvent_flows_pre35, drop_temporary_outdated_biosphere_flows, - link_biosphere_by_flow_uuid, + partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database or config.biosphere), link_internal_technosphere_by_composite_code, delete_exchanges_missing_activity, delete_ghost_exchanges, diff --git a/bw2io/importers/ecospold2_biosphere.py b/bw2io/importers/ecospold2_biosphere.py index 07708248..de796206 100644 --- a/bw2io/importers/ecospold2_biosphere.py +++ b/bw2io/importers/ecospold2_biosphere.py @@ -1,3 +1,4 @@ +from pathlib import Path import json import os @@ -42,7 +43,7 @@ class Ecospold2BiosphereImporter(LCIImporter): format = "Ecoinvent XML" - def __init__(self, name="biosphere3", version="3.9"): + def __init__(self, name: str ="biosphere3", version: str="3.9", filepath: Path | None = None): """ Initialize the importer. @@ -54,21 +55,23 @@ def __init__(self, name="biosphere3", version="3.9"): Version of the database, by default "3.9". """ self.db_name = name - self.data = self.extract(version) + self.data = self.extract(version, filepath) self.strategies = [ normalize_units, drop_unspecified_subcategories, ensure_categories_are_tuples, ] - def extract(self, version): + def extract(self, version: str | None, filepath: Path | None): """ Extract elementary flows from the xml file. Parameters ---------- - version : str - Version of the database. + version + Version of the database if using default data. + filepath + File path of user-specified data file Returns ------- @@ -94,14 +97,9 @@ def extract_flow_data(o): ) return ds - lci_dirpath = os.path.join(os.path.dirname(__file__), "..", "data", "lci") + if not filepath: + filepath = Path(__file__).parent.parent.resolve() / "data" / "lci" / f"ecoinvent elementary flows {version}.xml" - fp = os.path.join(lci_dirpath, f"ecoinvent elementary flows {version}.xml") - root = objectify.parse(open(fp, encoding="utf-8")).getroot() - flow_data = recursive_str_to_unicode( - [extract_flow_data(ds) for ds in root.iterchildren()] - ) - - # previous = os.path.join(lci_dirpath, "previous elementary flows.json") - # return flow_data + json.load(open(previous)) + root = objectify.parse(open(filepath, encoding="utf-8")).getroot() + flow_data = [extract_flow_data(ds) for ds in root.iterchildren()] return flow_data diff --git a/bw2io/strategies/ecospold2.py b/bw2io/strategies/ecospold2.py index ed3fe758..bc6ceda3 100644 --- a/bw2io/strategies/ecospold2.py +++ b/bw2io/strategies/ecospold2.py @@ -9,7 +9,7 @@ from .migrations import migrate_exchanges, migrations -def link_biosphere_by_flow_uuid(db, biosphere="biosphere3"): +def link_biosphere_by_flow_uuid(db: list[dict], biosphere: str="biosphere3"): """ Link the exchanges in the given list of datasets to the specified biosphere database by flow UUID. @@ -1232,7 +1232,7 @@ def has_cpc(exc): def delete_none_synonyms(db): """ - Remove None values from the 'synonyms' list of each dataset. + Remove `None` values from the 'synonyms' list of each dataset. Parameters ---------- From 7d8474baa86197a5d998fa425b295d559bdb3a96 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 21:43:31 +0100 Subject: [PATCH 2/8] Allow use of existing biosphere database --- bw2io/ecoinvent.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index 2f560300..921d782f 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -58,8 +58,9 @@ def import_ecoinvent_release( username: str | None = None, password: str | None = None, lci: bool = True, - lcia: bool = False, + lcia: bool = True, biosphere_name: str | None = None, + use_existing_biosphere: bool = False ) -> None: """Import an ecoinvent LCI and optionally LCIA database. @@ -109,28 +110,35 @@ def import_ecoinvent_release( if biosphere_name is None: biosphere_name = f"ecoinvent-{version}-biosphere" if lci: - if biosphere_name in bd.databases: - raise ValueError(f"Biosphere database {biosphere_name} already exists") - db_name = f"ecoinvent-{version}-{system_model}" - if db_name in bd.databases: - raise ValueError(f"Database {db_name} already exists") - lci_path = release.get_release( version=version, system_model=system_model, release_type=ei.ReleaseType.ecospold, ) - eb = Ecospold2BiosphereImporter( - name=biosphere_name, - filepath=lci_path / "MasterData" / "ElementaryExchanges.xml", - ) - eb.apply_strategies() - if not eb.all_linked: - raise ValueError( - f"Can't ingest biosphere database {biosphere_name} - unlinked flows." + db_name = f"ecoinvent-{version}-{system_model}" + if db_name in bd.databases: + raise ValueError(f"Database {db_name} already exists") + + if use_existing_biosphere: + if biosphere_name not in bd.databases: + raise ValueError(f"Biosphere database {biosphere_name} doesn't exist") + elif not len(bd.Database(biosphere_name)): + raise ValueError(f"Biosphere database {biosphere_name} is empty") + else: + if biosphere_name in bd.databases: + raise ValueError(f"Biosphere database {biosphere_name} already exists") + + eb = Ecospold2BiosphereImporter( + name=biosphere_name, + filepath=lci_path / "MasterData" / "ElementaryExchanges.xml", ) - eb.write_database(overwrite=False) + eb.apply_strategies() + if not eb.all_linked: + raise ValueError( + f"Can't ingest biosphere database {biosphere_name} - unlinked flows." + ) + eb.write_database(overwrite=False) bd.preferences["biosphere_database"] = biosphere_name soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name) From c7f87ef31d2a19b7948e32b7dda84c15d6826fe8 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 21:43:48 +0100 Subject: [PATCH 3/8] Add documentation examples --- bw2io/ecoinvent.py | 78 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index 921d782f..91c3d89d 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -62,11 +62,26 @@ def import_ecoinvent_release( biosphere_name: str | None = None, use_existing_biosphere: bool = False ) -> None: - """Import an ecoinvent LCI and optionally LCIA database. + """ + Import an ecoinvent LCI and/or LCIA release. Uses [ecoinvent_interface](https://github.com/brightway-lca/ecoinvent_interface). Auth credentials are optional as they can be set externally (see the `ecoinvent_interface` documentation), and such permanent storage is highly recommended. - The biosphere database must not exist. It is too much work to do selective updates. + **DO NOT** run `bw2setup` before using this function - it isn't needed and will cause broken results. + + System model strings follow the ecoinvent unofficial API. They are given in a short or long form. The short forms: + + * cutoff + * consequential + * apos + * EN15804 + + And the long forms: + + * Allocation cut-off by classification + * Substitution, consequential, long-term + * Allocation at the Point of Substitution + * Allocation, cut-off, EN15804" Parameters ---------- @@ -84,6 +99,65 @@ def import_ecoinvent_release( Flag on whether to import the LCIA impact categories biosphere_name Name of database to store biosphere flows. They will be stored in the main LCI database if not specified. + use_existing_biosphere + Flag on whether to create a new biosphere database or use an existing one + + Examples + -------- + + Get ecoinvent 3.9.1 cutoff in a new project (**without** running `bw2setup` first): + + >>> my_ecoinvent_username = "XXX" + >>> my_ecoinvent_password = "XXX" + >>> import ecoinvent_interface as ei + >>> import bw2data as bd + >>> import bw2io as bi + >>> bd.projects.set_current("some new project") + >>> bi.import_ecoinvent_release( + ... version="3.9.1", + ... system_model="cutoff", + ... username=my_ecoinvent_username, + ... password=my_ecoinvent_password, + ... ) + >>> bd.databases + Databases dictionary with 2 object(s): + ecoinvent-3.9.1-biosphere + ecoinvent-3.9.1-cutoff + >>> len(bd.methods) + 762 + + Add ecoinvent 3.9.1 apos to the same project: + + >>> bi.import_ecoinvent_release( + ... version="3.9.1", + ... system_model="apos", + ... username=my_ecoinvent_username, + ... password=my_ecoinvent_password, + ... use_existing_biosphere=True + ... ) + >>> bd.databases + Databases dictionary with 3 object(s): + ecoinvent-3.9.1-apos + ecoinvent-3.9.1-biosphere + ecoinvent-3.9.1-cutoff + + Create a new database but use `biosphere3` for the biosphere database name don't add LCIA methods + + >>> bd.projects.set_current("some other project") + >>> bi.import_ecoinvent_release( + ... version="3.9.1", + ... system_model="cutoff", + ... username=my_ecoinvent_username, + ... password=my_ecoinvent_password, + ... biosphere_name="biosphere3", + ... lcia=False + ... ) + >>> bd.databases + Databases dictionary with 2 object(s): + biosphere3 + ecoinvent-3.9.1-cutoff + >>> len(bd.methods) + 0 """ from . import create_core_migrations, migrations From f301fb472a28a813939cad24663f9e4390a08f57 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 21:51:08 +0100 Subject: [PATCH 4/8] Can't skip blank excel lines, they are used in our template --- bw2io/ecoinvent.py | 1 + bw2io/extractors/excel.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index 91c3d89d..53c11417 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -34,6 +34,7 @@ def header_dict(array: list) -> list[dict]: return [ {header.lower(): value for header, value in zip(array[0], row)} for row in array[1:] + if any(row) ] diff --git a/bw2io/extractors/excel.py b/bw2io/extractors/excel.py index c210e32f..3c2d1b03 100644 --- a/bw2io/extractors/excel.py +++ b/bw2io/extractors/excel.py @@ -136,7 +136,6 @@ def extract_sheet(cls, wb: workbook.Workbook, name: str, strip: bool=True): """ ws = wb[name] _ = lambda x: x.strip() if (strip and hasattr(x, "strip")) else x - provisional = [ + return [ [_(get_cell_value_handle_error(cell)) for cell in row] for row in ws.rows ] - return [line for line in provisional if any(line)] From 7bc95d929ae9aab11814713bf1f48992ada2b5ba Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 21:59:02 +0100 Subject: [PATCH 5/8] Only test on py3.10 or higher --- azure-pipelines.yml | 12 ++++++------ setup.py | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index fe63d63d..1595ef93 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,8 +13,8 @@ jobs: matrix: Python310: python.version: '3.10' - Python37: - python.version: '3.9' + Python311: + python.version: '3.11' timeoutInMinutes: 20 steps: @@ -43,8 +43,8 @@ jobs: vmImage: 'macOS-latest' strategy: matrix: - Python310: - python.version: '3.10' + Python311: + python.version: '3.11' timeoutInMinutes: 20 steps: @@ -73,8 +73,8 @@ jobs: vmImage: 'windows-latest' strategy: matrix: - Python310: - python.version: '3.10' + Python311: + python.version: '3.11' timeoutInMinutes: 60 steps: diff --git a/setup.py b/setup.py index aa190f60..17fc9488 100644 --- a/setup.py +++ b/setup.py @@ -61,8 +61,9 @@ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Programming Language :: Python", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Mathematics", "Topic :: Scientific/Engineering :: Visualization", From 09fc8f667497904023878d9d8ad2c2e918a064a6 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 21:59:44 +0100 Subject: [PATCH 6/8] FIx database name parameter --- bw2io/ecoinvent.py | 2 +- bw2io/importers/ecospold2.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index 53c11417..a62f009c 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -216,7 +216,7 @@ def import_ecoinvent_release( eb.write_database(overwrite=False) bd.preferences["biosphere_database"] = biosphere_name - soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name) + soup = SingleOutputEcospold2Importer(dirpath=lci_path / "datasets", db_name=db_name, biosphere_database_name=biosphere_name) soup.apply_strategies() if not soup.all_linked: raise ValueError( diff --git a/bw2io/importers/ecospold2.py b/bw2io/importers/ecospold2.py index 63c9c11e..15d10e81 100644 --- a/bw2io/importers/ecospold2.py +++ b/bw2io/importers/ecospold2.py @@ -53,7 +53,7 @@ def __init__( self, dirpath: str, db_name: str, - biosphere_database: str | None = None, + biosphere_database_name: str | None = None, extractor: Any=Ecospold2DataExtractor, use_mp: bool=True, signal: Any=None, @@ -69,7 +69,7 @@ def __init__( Path to the directory containing the ecospold2 file. db_name : str Name of the LCI database. - biosphere_database : str | None + biosphere_database_name : str | None Name of biosphere database to link to. Uses `config.biosphere` if not provided. extractor : class Class for extracting data from the ecospold2 file, by default Ecospold2DataExtractor. @@ -102,7 +102,7 @@ def __init__( drop_unspecified_subcategories, fix_ecoinvent_flows_pre35, drop_temporary_outdated_biosphere_flows, - partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database or config.biosphere), + partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database_name or config.biosphere), link_internal_technosphere_by_composite_code, delete_exchanges_missing_activity, delete_ghost_exchanges, @@ -111,7 +111,7 @@ def __init__( convert_activity_parameters_to_list, add_cpc_classification_from_single_reference_product, delete_none_synonyms, - partial(update_social_flows_in_older_consequential, biosphere_db=Database(config.biosphere)), + partial(update_social_flows_in_older_consequential, biosphere_db=Database(biosphere_database_name or config.biosphere)), ] if reparametrize_lognormals: From 22994ebbbada2c293610593292a4f51add79e99c Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 22:00:21 +0100 Subject: [PATCH 7/8] Reformatting --- bw2io/ecoinvent.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index a62f009c..5c0e2558 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -61,7 +61,7 @@ def import_ecoinvent_release( lci: bool = True, lcia: bool = True, biosphere_name: str | None = None, - use_existing_biosphere: bool = False + use_existing_biosphere: bool = False, ) -> None: """ Import an ecoinvent LCI and/or LCIA release. @@ -216,7 +216,11 @@ def import_ecoinvent_release( eb.write_database(overwrite=False) bd.preferences["biosphere_database"] = biosphere_name - soup = SingleOutputEcospold2Importer(dirpath=lci_path / "datasets", db_name=db_name, biosphere_database_name=biosphere_name) + soup = SingleOutputEcospold2Importer( + dirpath=lci_path / "datasets", + db_name=db_name, + biosphere_database_name=biosphere_name, + ) soup.apply_strategies() if not soup.all_linked: raise ValueError( @@ -311,9 +315,13 @@ def import_ecoinvent_release( for name in same_context ] ) - if candidates[0][0] < 3 and candidates[0][0] != candidates[1][0] and candidates[0][1][0].lower() == row['name'][0].lower(): + if ( + candidates[0][0] < 3 + and candidates[0][0] != candidates[1][0] + and candidates[0][1][0].lower() == row["name"][0].lower() + ): new_name = candidates[0][1] - pair = (new_name, row['name']) + pair = (new_name, row["name"]) if pair not in substituted: print(f"Substituting {new_name} for {row['name']}") substituted.add(pair) From b596c2124132deb70cf9779c1844c3be5659a9b4 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 9 Nov 2023 22:02:16 +0100 Subject: [PATCH 8/8] Add importer signal for AB love --- bw2io/ecoinvent.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py index 5c0e2558..0769bd8a 100644 --- a/bw2io/ecoinvent.py +++ b/bw2io/ecoinvent.py @@ -2,6 +2,7 @@ import zipfile from collections import defaultdict from pathlib import Path +from typing import Any import bw2data as bd import ecoinvent_interface as ei @@ -62,6 +63,7 @@ def import_ecoinvent_release( lcia: bool = True, biosphere_name: str | None = None, use_existing_biosphere: bool = False, + importer_signal: Any = None, ) -> None: """ Import an ecoinvent LCI and/or LCIA release. @@ -102,6 +104,8 @@ def import_ecoinvent_release( Name of database to store biosphere flows. They will be stored in the main LCI database if not specified. use_existing_biosphere Flag on whether to create a new biosphere database or use an existing one + importer_signal + Used by the Activity Browser to provide feedback during the import Examples -------- @@ -220,6 +224,7 @@ def import_ecoinvent_release( dirpath=lci_path / "datasets", db_name=db_name, biosphere_database_name=biosphere_name, + signal=importer_signal, ) soup.apply_strategies() if not soup.all_linked: