From 5d37d0a81b2f2e2b1abeaac52241af66d6b3ee19 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Wed, 8 Nov 2023 23:19:19 +0100
Subject: [PATCH 1/8] Add `import_ecoinvent_release` utility

---
 bw2io/__init__.py                      |  12 +-
 bw2io/ecoinvent.py                     | 260 +++++++++++++++++++++++++
 bw2io/extractors/excel.py              |  17 +-
 bw2io/importers/ecoinvent_lcia.py      |   7 +-
 bw2io/importers/ecospold2.py           |  20 +-
 bw2io/importers/ecospold2_biosphere.py |  26 ++-
 bw2io/strategies/ecospold2.py          |   4 +-
 7 files changed, 310 insertions(+), 36 deletions(-)
 create mode 100644 bw2io/ecoinvent.py
diff --git a/bw2io/__init__.py b/bw2io/__init__.py
index ca05841c..11e77c1c 100644
--- a/bw2io/__init__.py
+++ b/bw2io/__init__.py
@@ -28,6 +28,7 @@
     "exiobase_monetary",
     "get_csv_example_filepath",
     "get_xlsx_example_filepath",
+    "import_ecoinvent_release",
     "install_project",
     "lci_matrices_to_excel",
     "lci_matrices_to_matlab",
@@ -41,9 +42,9 @@
     "SimaProLCIACSVImporter",
     "SingleOutputEcospold1Importer",
     "SingleOutputEcospold2Importer",
-    "useeio11",
     "unlinked_data",
     "UnlinkedData",
+    "useeio11",
 ]
 
 from .version import version as __version__
@@ -94,6 +95,15 @@
 from .utils import activity_hash, es2_activity_hash, load_json_data_file
 from .remote import install_project
 
+try:
+    from .ecoinvent import import_ecoinvent_release
+except ImportError:
+    import warnings
+
+    def import_ecoinvent_release(*args, **kwargs):
+        warnings.warn("Please install `ecoinvent_interface` to use this function")
+
+
 from bw2data import config, databases
 
 config.metadata.extend(
diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
new file mode 100644
index 00000000..2f560300
--- /dev/null
+++ b/bw2io/ecoinvent.py
@@ -0,0 +1,260 @@
+import re
+import zipfile
+from collections import defaultdict
+from pathlib import Path
+
+import bw2data as bd
+import ecoinvent_interface as ei
+import openpyxl
+from ecoinvent_interface.core import SYSTEM_MODELS
+from ecoinvent_interface.string_distance import damerau_levenshtein
+
+from .extractors import ExcelExtractor
+from .importers import (
+    EcoinventLCIAImporter,
+    Ecospold2BiosphereImporter,
+    SingleOutputEcospold2Importer,
+)
+
+
+def get_excel_sheet_names(file_path: Path) -> list[str]:
+    """Read XML metadata file instead of using openpyxl, which loads the whole workbook.
+
+    From https://stackoverflow.com/questions/12250024/how-to-obtain-sheet-names-from-xls-files-without-loading-the-whole-file.
+    """
+    sheets = []
+    with zipfile.ZipFile(file_path, "r") as zip_ref:
+        xml = zip_ref.read("xl/workbook.xml").decode("utf-8")
+        for s_tag in re.findall("<sheet [^>]*", xml):
+            sheets.append(re.search('name="[^"]*', s_tag).group(0)[6:])
+    return sheets
+
+
+def header_dict(array: list) -> list[dict]:
+    return [
+        {header.lower(): value for header, value in zip(array[0], row)}
+        for row in array[1:]
+    ]
+
+
+def drop_unspecified(a: str, b: str, c: str) -> tuple:
+    if c.lower() == "unspecified":
+        return (a, b)
+    else:
+        return (a, b, c)
+
+
+def pick_a_unit_label_already(obj: dict) -> str:
+    candidates = ("indicator unit", "unit", "unitname", "impact score unit")
+    for candidate in candidates:
+        if candidate in obj:
+            return candidate
+    raise KeyError("Can't find suitable column label for LCIA units")
+
+
+def import_ecoinvent_release(
+    version: str,
+    system_model: str,
+    username: str | None = None,
+    password: str | None = None,
+    lci: bool = True,
+    lcia: bool = False,
+    biosphere_name: str | None = None,
+) -> None:
+    """Import an ecoinvent LCI and optionally LCIA database.
+
+    Uses [ecoinvent_interface](https://github.com/brightway-lca/ecoinvent_interface). Auth credentials are optional as they can be set externally (see the `ecoinvent_interface` documentation), and such permanent storage is highly recommended.
+
+    The biosphere database must not exist. It is too much work to do selective updates.
+
+    Parameters
+    ----------
+    version
+        The ecoinvent release version as a string, e.g. '3.9.1'
+    system_model
+        The system model as a string in short or long form, e.g. 'apos' or 'Allocation cut-off by classification'
+    username
+        ecoinvent username
+    password
+        ecoinvent password
+    lci
+        Flag on whether to import the inventory database
+    lcia
+        Flag on whether to import the LCIA impact categories
+    biosphere_name
+        Name of database to store biosphere flows. They will be stored in the main LCI database if not specified.
+
+    """
+    from . import create_core_migrations, migrations
+
+    if not len(migrations):
+        create_core_migrations()
+
+    if username is None and password is None:
+        settings = ei.Settings()
+    else:
+        settings = ei.Settings(username=username, password=password)
+    if not settings.username or not settings.password:
+        raise ValueError("Can't determine ecoinvent username or password")
+
+    release = ei.EcoinventRelease(settings)
+    if not version in release.list_versions():
+        raise ValueError(f"Invalid version {version}")
+
+    if system_model in SYSTEM_MODELS:
+        system_model = SYSTEM_MODELS[system_model]
+    if not system_model in release.list_system_models(version):
+        raise ValueError(f"Invalid system model {system_model}")
+
+    if biosphere_name is None:
+        biosphere_name = f"ecoinvent-{version}-biosphere"
+    if lci:
+        if biosphere_name in bd.databases:
+            raise ValueError(f"Biosphere database {biosphere_name} already exists")
+        db_name = f"ecoinvent-{version}-{system_model}"
+        if db_name in bd.databases:
+            raise ValueError(f"Database {db_name} already exists")
+
+        lci_path = release.get_release(
+            version=version,
+            system_model=system_model,
+            release_type=ei.ReleaseType.ecospold,
+        )
+
+        eb = Ecospold2BiosphereImporter(
+            name=biosphere_name,
+            filepath=lci_path / "MasterData" / "ElementaryExchanges.xml",
+        )
+        eb.apply_strategies()
+        if not eb.all_linked:
+            raise ValueError(
+                f"Can't ingest biosphere database {biosphere_name} - unlinked flows."
+            )
+        eb.write_database(overwrite=False)
+        bd.preferences["biosphere_database"] = biosphere_name
+
+        soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name)
+        soup.apply_strategies()
+        if not soup.all_linked:
+            raise ValueError(
+                f"Can't ingest inventory database {db_name} - unlinked flows."
+            )
+        soup.write_database()
+
+    if lcia:
+        if biosphere_name is None:
+            biosphere_name = bd.config.biosphere
+        if biosphere_name not in bd.databases or not len(bd.Database(biosphere_name)):
+            raise ValueError(
+                f"Can't find populated biosphere flow database {biosphere_name}"
+            )
+
+        lcia_file = ei.get_excel_lcia_file_for_version(release=release, version=version)
+        sheet_names = get_excel_sheet_names(lcia_file)
+
+        if "units" in sheet_names:
+            units_sheetname = "units"
+        elif "Indicators" in sheet_names:
+            units_sheetname = "Indicators"
+        else:
+            raise ValueError(
+                f"Can't find worksheet for impact category units in {sheet_names}"
+            )
+
+        if "CFs" not in sheet_names:
+            raise ValueError(
+                f"Can't find worksheet for characterization factors; expected `CFs`, found {sheet_names}"
+            )
+
+        data = dict(ExcelExtractor.extract(lcia_file))
+        units = header_dict(data[units_sheetname])
+
+        cfs = header_dict(data["CFs"])
+
+        CF_COLUMN_LABELS = {
+            "3.4": "cf 3.4",
+            "3.5": "cf 3.5",
+            "3.6": "cf 3.6",
+        }
+        cf_col_label = CF_COLUMN_LABELS.get(version, "cf")
+        units_col_label = pick_a_unit_label_already(units[0])
+        units_mapping = {
+            (row["method"], row["category"], row["indicator"]): row[units_col_label]
+            for row in units
+        }
+
+        biosphere_mapping = {}
+        for flow in bd.Database(biosphere_name):
+            biosphere_mapping[(flow["name"],) + tuple(flow["categories"])] = flow.id
+            if flow["name"].startswith("[Deleted]"):
+                biosphere_mapping[
+                    (flow["name"].replace("[Deleted]", ""),) + tuple(flow["categories"])
+                ] = flow.id
+
+        lcia_data_as_dict = defaultdict(list)
+
+        unmatched = set()
+        substituted = set()
+
+        for row in cfs:
+            impact_category = (row["method"], row["category"], row["indicator"])
+            if row[cf_col_label] is None:
+                continue
+            try:
+                lcia_data_as_dict[impact_category].append(
+                    (
+                        biosphere_mapping[
+                            drop_unspecified(
+                                row["name"], row["compartment"], row["subcompartment"]
+                            )
+                        ],
+                        float(row[cf_col_label]),
+                    )
+                )
+            except KeyError:
+                # How is this possible? We are matching ecoinvent data against
+                # ecoinvent data from the same release! And yet it moves...
+                category = (
+                    (row["compartment"], row["subcompartment"])
+                    if row["subcompartment"].lower() != "unspecified"
+                    else (row["compartment"],)
+                )
+                same_context = {
+                    k[0]: v for k, v in biosphere_mapping.items() if k[1:] == category
+                }
+                candidates = sorted(
+                    [
+                        (damerau_levenshtein(name, row["name"]), name)
+                        for name in same_context
+                    ]
+                )
+                if candidates[0][0] < 3 and candidates[0][0] != candidates[1][0] and candidates[0][1][0].lower() == row['name'][0].lower():
+                    new_name = candidates[0][1]
+                    pair = (new_name, row['name'])
+                    if pair not in substituted:
+                        print(f"Substituting {new_name} for {row['name']}")
+                        substituted.add(pair)
+                    lcia_data_as_dict[impact_category].append(
+                        (
+                            same_context[new_name],
+                            float(row[cf_col_label]),
+                        )
+                    )
+                else:
+                    if row["name"] not in unmatched:
+                        print(
+                            "Skipping unmatched flow {}:({}, {})".format(
+                                row["name"], row["compartment"], row["subcompartment"]
+                            )
+                        )
+                        unmatched.add(row["name"])
+
+        for key in lcia_data_as_dict:
+            method = bd.Method(key)
+            method.register(
+                unit=units_mapping.get(key, "Unknown"),
+                filepath=str(lcia_file),
+                ecoinvent_version=version,
+                database=biosphere_name,
+            )
+            method.write(lcia_data_as_dict[key])
diff --git a/bw2io/extractors/excel.py b/bw2io/extractors/excel.py
index 98821a3b..c210e32f 100644
--- a/bw2io/extractors/excel.py
+++ b/bw2io/extractors/excel.py
@@ -1,9 +1,10 @@
+from pathlib import Path
 import os
 
-from openpyxl import load_workbook
+from openpyxl import load_workbook, cell, workbook
 
 
-def get_cell_value_handle_error(cell):
+def get_cell_value_handle_error(cell: cell.cell.Cell):
     """
     Retrieve the value of a given cell and handle error types.
 
@@ -34,7 +35,7 @@ def get_cell_value_handle_error(cell):
         return cell.value
 
 
-class ExcelExtractor(object):
+class ExcelExtractor:
     """
     A class used to extract data from an Excel file.
     
@@ -78,7 +79,7 @@ class ExcelExtractor(object):
     >>> data = extractor.extract(filepath)
     """
     @classmethod
-    def extract(cls, filepath):
+    def extract(cls, filepath: Path):
         """
         Extract data from an Excel file.
 
@@ -97,14 +98,15 @@ def extract(cls, filepath):
         AssertionError
             If the file at 'filepath' does not exist.
         """
-        assert os.path.exists(filepath), "Can't file file at path {}".format(filepath)
+        filepath = Path(filepath)
+        assert filepath.is_file(), "Can't file file at path {}".format(filepath)
         wb = load_workbook(filepath, data_only=True, read_only=True)
         data = [(name, cls.extract_sheet(wb, name)) for name in wb.sheetnames]
         wb.close()
         return data
 
     @classmethod
-    def extract_sheet(cls, wb, name, strip=True):
+    def extract_sheet(cls, wb: workbook.Workbook, name: str, strip: bool=True):
         """
         Extract data from a single sheet in an Excel workbook.
 
@@ -134,6 +136,7 @@ def extract_sheet(cls, wb, name, strip=True):
         """
         ws = wb[name]
         _ = lambda x: x.strip() if (strip and hasattr(x, "strip")) else x
-        return [
+        provisional = [
             [_(get_cell_value_handle_error(cell)) for cell in row] for row in ws.rows
         ]
+        return [line for line in provisional if any(line)]
diff --git a/bw2io/importers/ecoinvent_lcia.py b/bw2io/importers/ecoinvent_lcia.py
index 144a586b..ffbccdce 100644
--- a/bw2io/importers/ecoinvent_lcia.py
+++ b/bw2io/importers/ecoinvent_lcia.py
@@ -22,7 +22,7 @@ class EcoinventLCIAImporter(LCIAImporter):
     
     """
 
-    def __init__(self):
+    def __init__(self, biosphere_database: str | None = None):
         """Initialize an instance of EcoinventLCIAImporter.
 
         Defines strategies in ``__init__`` because ``config.biosphere`` is dynamic.
@@ -33,7 +33,7 @@ def __init__(self):
             drop_unspecified_subcategories,
             functools.partial(
                 link_iterable_by_fields,
-                other=Database(config.biosphere),
+                other=Database(biosphere_database or config.biosphere),
                 fields=("name", "categories"),
             ),
         ]
@@ -42,7 +42,6 @@ def __init__(self):
         self.separate_methods()
 
     def add_rationalize_method_names_strategy(self):
-        """Add the `rationalize_method_names` strategy to the list of strategies"""
         self.strategies.append(rationalize_method_names)
 
     def separate_methods(self):
@@ -82,4 +81,4 @@ def separate_methods(self):
             )
 
         self.data = list(self.data.values())
-        
\ No newline at end of file
+        
diff --git a/bw2io/importers/ecospold2.py b/bw2io/importers/ecospold2.py
index df4d176f..63c9c11e 100644
--- a/bw2io/importers/ecospold2.py
+++ b/bw2io/importers/ecospold2.py
@@ -1,3 +1,4 @@
+from typing import Any
 from functools import partial
 from pathlib import Path
 from time import time
@@ -46,16 +47,17 @@ class SingleOutputEcospold2Importer(LCIImporter):
     
     """
 
-    format = u"Ecospold2"
+    format = "Ecospold2"
 
     def __init__(
         self,
-        dirpath,
-        db_name,
-        extractor=Ecospold2DataExtractor,
-        use_mp=True,
-        signal=None,
-        reparametrize_lognormals=False,
+        dirpath: str,
+        db_name: str,
+        biosphere_database: str | None = None,
+        extractor: Any=Ecospold2DataExtractor,
+        use_mp: bool=True,
+        signal: Any=None,
+        reparametrize_lognormals: bool=False,
     ):
 
         """
@@ -67,6 +69,8 @@ def __init__(
             Path to the directory containing the ecospold2 file.
         db_name : str
             Name of the LCI database.
+        biosphere_database : str | None
+            Name of biosphere database to link to. Uses `config.biosphere` if not provided.
         extractor : class
             Class for extracting data from the ecospold2 file, by default Ecospold2DataExtractor.
         use_mp : bool
@@ -98,7 +102,7 @@ def __init__(
             drop_unspecified_subcategories,
             fix_ecoinvent_flows_pre35,
             drop_temporary_outdated_biosphere_flows,
-            link_biosphere_by_flow_uuid,
+            partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database or config.biosphere),
             link_internal_technosphere_by_composite_code,
             delete_exchanges_missing_activity,
             delete_ghost_exchanges,
diff --git a/bw2io/importers/ecospold2_biosphere.py b/bw2io/importers/ecospold2_biosphere.py
index 07708248..de796206 100644
--- a/bw2io/importers/ecospold2_biosphere.py
+++ b/bw2io/importers/ecospold2_biosphere.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 import json
 import os
 
@@ -42,7 +43,7 @@ class Ecospold2BiosphereImporter(LCIImporter):
 
     format = "Ecoinvent XML"
 
-    def __init__(self, name="biosphere3", version="3.9"):
+    def __init__(self, name: str ="biosphere3", version: str="3.9", filepath: Path | None = None):
         """
         Initialize the importer.
 
@@ -54,21 +55,23 @@ def __init__(self, name="biosphere3", version="3.9"):
             Version of the database, by default "3.9".
         """
         self.db_name = name
-        self.data = self.extract(version)
+        self.data = self.extract(version, filepath)
         self.strategies = [
             normalize_units,
             drop_unspecified_subcategories,
             ensure_categories_are_tuples,
         ]
 
-    def extract(self, version):
+    def extract(self, version: str | None, filepath: Path | None):
         """
         Extract elementary flows from the xml file.
 
         Parameters
         ----------
-        version : str
-            Version of the database.
+        version
+            Version of the database if using default data.
+        filepath
+            File path of user-specified data file
 
         Returns
         -------
@@ -94,14 +97,9 @@ def extract_flow_data(o):
             )
             return ds
 
-        lci_dirpath = os.path.join(os.path.dirname(__file__), "..", "data", "lci")
+        if not filepath:
+            filepath = Path(__file__).parent.parent.resolve() / "data" / "lci" / f"ecoinvent elementary flows {version}.xml"
 
-        fp = os.path.join(lci_dirpath, f"ecoinvent elementary flows {version}.xml")
-        root = objectify.parse(open(fp, encoding="utf-8")).getroot()
-        flow_data = recursive_str_to_unicode(
-            [extract_flow_data(ds) for ds in root.iterchildren()]
-        )
-
-        # previous = os.path.join(lci_dirpath, "previous elementary flows.json")
-        # return flow_data + json.load(open(previous))
+        root = objectify.parse(open(filepath, encoding="utf-8")).getroot()
+        flow_data = [extract_flow_data(ds) for ds in root.iterchildren()]
         return flow_data
diff --git a/bw2io/strategies/ecospold2.py b/bw2io/strategies/ecospold2.py
index ed3fe758..bc6ceda3 100644
--- a/bw2io/strategies/ecospold2.py
+++ b/bw2io/strategies/ecospold2.py
@@ -9,7 +9,7 @@
 from .migrations import migrate_exchanges, migrations
 
 
-def link_biosphere_by_flow_uuid(db, biosphere="biosphere3"):
+def link_biosphere_by_flow_uuid(db: list[dict], biosphere: str="biosphere3"):
     """
     Link the exchanges in the given list of datasets to the specified
     biosphere database by flow UUID.
@@ -1232,7 +1232,7 @@ def has_cpc(exc):
 
 def delete_none_synonyms(db):
     """
-    Remove None values from the 'synonyms' list of each dataset.
+    Remove `None` values from the 'synonyms' list of each dataset.
 
     Parameters
     ----------

From 7d8474baa86197a5d998fa425b295d559bdb3a96 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 21:43:31 +0100
Subject: [PATCH 2/8] Allow use of existing biosphere database

---
 bw2io/ecoinvent.py | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index 2f560300..921d782f 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -58,8 +58,9 @@ def import_ecoinvent_release(
     username: str | None = None,
     password: str | None = None,
     lci: bool = True,
-    lcia: bool = False,
+    lcia: bool = True,
     biosphere_name: str | None = None,
+    use_existing_biosphere: bool = False
 ) -> None:
     """Import an ecoinvent LCI and optionally LCIA database.
 
@@ -109,28 +110,35 @@ def import_ecoinvent_release(
     if biosphere_name is None:
         biosphere_name = f"ecoinvent-{version}-biosphere"
     if lci:
-        if biosphere_name in bd.databases:
-            raise ValueError(f"Biosphere database {biosphere_name} already exists")
-        db_name = f"ecoinvent-{version}-{system_model}"
-        if db_name in bd.databases:
-            raise ValueError(f"Database {db_name} already exists")
-
         lci_path = release.get_release(
             version=version,
             system_model=system_model,
             release_type=ei.ReleaseType.ecospold,
         )
 
-        eb = Ecospold2BiosphereImporter(
-            name=biosphere_name,
-            filepath=lci_path / "MasterData" / "ElementaryExchanges.xml",
-        )
-        eb.apply_strategies()
-        if not eb.all_linked:
-            raise ValueError(
-                f"Can't ingest biosphere database {biosphere_name} - unlinked flows."
+        db_name = f"ecoinvent-{version}-{system_model}"
+        if db_name in bd.databases:
+            raise ValueError(f"Database {db_name} already exists")
+
+        if use_existing_biosphere:
+            if biosphere_name not in bd.databases:
+                raise ValueError(f"Biosphere database {biosphere_name} doesn't exist")
+            elif not len(bd.Database(biosphere_name)):
+                raise ValueError(f"Biosphere database {biosphere_name} is empty")
+        else:
+            if biosphere_name in bd.databases:
+                raise ValueError(f"Biosphere database {biosphere_name} already exists")
+
+            eb = Ecospold2BiosphereImporter(
+                name=biosphere_name,
+                filepath=lci_path / "MasterData" / "ElementaryExchanges.xml",
             )
-        eb.write_database(overwrite=False)
+            eb.apply_strategies()
+            if not eb.all_linked:
+                raise ValueError(
+                    f"Can't ingest biosphere database {biosphere_name} - unlinked flows."
+                )
+            eb.write_database(overwrite=False)
         bd.preferences["biosphere_database"] = biosphere_name
 
         soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name)

From c7f87ef31d2a19b7948e32b7dda84c15d6826fe8 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 21:43:48 +0100
Subject: [PATCH 3/8] Add documentation examples

---
 bw2io/ecoinvent.py | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index 921d782f..91c3d89d 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -62,11 +62,26 @@ def import_ecoinvent_release(
     biosphere_name: str | None = None,
     use_existing_biosphere: bool = False
 ) -> None:
-    """Import an ecoinvent LCI and optionally LCIA database.
+    """
+    Import an ecoinvent LCI and/or LCIA release.
 
     Uses [ecoinvent_interface](https://github.com/brightway-lca/ecoinvent_interface). Auth credentials are optional as they can be set externally (see the `ecoinvent_interface` documentation), and such permanent storage is highly recommended.
 
-    The biosphere database must not exist. It is too much work to do selective updates.
+    **DO NOT** run `bw2setup` before using this function - it isn't needed and will cause broken results.
+
+    System model strings follow the ecoinvent unofficial API. They are given in a short or long form. The short forms:
+
+    * cutoff
+    * consequential
+    * apos
+    * EN15804
+
+    And the long forms:
+
+    * Allocation cut-off by classification
+    * Substitution, consequential, long-term
+    * Allocation at the Point of Substitution
+    * Allocation, cut-off, EN15804"
 
     Parameters
     ----------
@@ -84,6 +99,65 @@ def import_ecoinvent_release(
         Flag on whether to import the LCIA impact categories
     biosphere_name
         Name of database to store biosphere flows. They will be stored in the main LCI database if not specified.
+    use_existing_biosphere
+        Flag on whether to create a new biosphere database or use an existing one
+
+    Examples
+    --------
+
+    Get ecoinvent 3.9.1 cutoff in a new project (**without** running `bw2setup` first):
+
+    >>> my_ecoinvent_username = "XXX"
+    >>> my_ecoinvent_password = "XXX"
+    >>> import ecoinvent_interface as ei
+    >>> import bw2data as bd
+    >>> import bw2io as bi
+    >>> bd.projects.set_current("some new project")
+    >>> bi.import_ecoinvent_release(
+    ...     version="3.9.1",
+    ...     system_model="cutoff",
+    ...     username=my_ecoinvent_username,
+    ...     password=my_ecoinvent_password,
+    ...     )
+    >>> bd.databases
+    Databases dictionary with 2 object(s):
+        ecoinvent-3.9.1-biosphere
+        ecoinvent-3.9.1-cutoff
+    >>> len(bd.methods)
+    762
+
+    Add ecoinvent 3.9.1 apos to the same project:
+
+    >>> bi.import_ecoinvent_release(
+    ...     version="3.9.1",
+    ...     system_model="apos",
+    ...     username=my_ecoinvent_username,
+    ...     password=my_ecoinvent_password,
+    ...     use_existing_biosphere=True
+    ...     )
+    >>> bd.databases
+    Databases dictionary with 3 object(s):
+        ecoinvent-3.9.1-apos
+        ecoinvent-3.9.1-biosphere
+        ecoinvent-3.9.1-cutoff
+
+    Create a new database but use `biosphere3` for the biosphere database name don't add LCIA methods
+
+    >>> bd.projects.set_current("some other project")
+    >>> bi.import_ecoinvent_release(
+    ...     version="3.9.1",
+    ...     system_model="cutoff",
+    ...     username=my_ecoinvent_username,
+    ...     password=my_ecoinvent_password,
+    ...     biosphere_name="biosphere3",
+    ...     lcia=False
+    ...     )
+    >>> bd.databases
+    Databases dictionary with 2 object(s):
+        biosphere3
+        ecoinvent-3.9.1-cutoff
+    >>> len(bd.methods)
+    0
 
     """
     from . import create_core_migrations, migrations

From f301fb472a28a813939cad24663f9e4390a08f57 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 21:51:08 +0100
Subject: [PATCH 4/8] Can't skip blank excel lines, they are used in our
 template

---
 bw2io/ecoinvent.py        | 1 +
 bw2io/extractors/excel.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index 91c3d89d..53c11417 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -34,6 +34,7 @@ def header_dict(array: list) -> list[dict]:
     return [
         {header.lower(): value for header, value in zip(array[0], row)}
         for row in array[1:]
+        if any(row)
     ]
 
 
diff --git a/bw2io/extractors/excel.py b/bw2io/extractors/excel.py
index c210e32f..3c2d1b03 100644
--- a/bw2io/extractors/excel.py
+++ b/bw2io/extractors/excel.py
@@ -136,7 +136,6 @@ def extract_sheet(cls, wb: workbook.Workbook, name: str, strip: bool=True):
         """
         ws = wb[name]
         _ = lambda x: x.strip() if (strip and hasattr(x, "strip")) else x
-        provisional = [
+        return [
             [_(get_cell_value_handle_error(cell)) for cell in row] for row in ws.rows
         ]
-        return [line for line in provisional if any(line)]

From 7bc95d929ae9aab11814713bf1f48992ada2b5ba Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 21:59:02 +0100
Subject: [PATCH 5/8] Only test on py3.10 or higher

---
 azure-pipelines.yml | 12 ++++++------
 setup.py            |  5 +++--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index fe63d63d..1595ef93 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -13,8 +13,8 @@ jobs:
     matrix:
       Python310:
         python.version: '3.10'
-      Python37:
-        python.version: '3.9'
+      Python311:
+        python.version: '3.11'
 
   timeoutInMinutes: 20
   steps:
@@ -43,8 +43,8 @@ jobs:
     vmImage: 'macOS-latest'
   strategy:
     matrix:
-      Python310:
-        python.version: '3.10'
+      Python311:
+        python.version: '3.11'
 
   timeoutInMinutes: 20
   steps:
@@ -73,8 +73,8 @@ jobs:
     vmImage: 'windows-latest'
   strategy:
     matrix:
-      Python310:
-        python.version: '3.10'
+      Python311:
+        python.version: '3.11'
 
   timeoutInMinutes: 60
   steps:
diff --git a/setup.py b/setup.py
index aa190f60..17fc9488 100644
--- a/setup.py
+++ b/setup.py
@@ -61,8 +61,9 @@
         "Operating System :: Microsoft :: Windows",
         "Operating System :: POSIX",
         "Programming Language :: Python",
-        "Programming Language :: Python :: 2.7",
-        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "Topic :: Scientific/Engineering :: Information Analysis",
         "Topic :: Scientific/Engineering :: Mathematics",
         "Topic :: Scientific/Engineering :: Visualization",

From 09fc8f667497904023878d9d8ad2c2e918a064a6 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 21:59:44 +0100
Subject: [PATCH 6/8] FIx database name parameter

---
 bw2io/ecoinvent.py           | 2 +-
 bw2io/importers/ecospold2.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index 53c11417..a62f009c 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -216,7 +216,7 @@ def import_ecoinvent_release(
             eb.write_database(overwrite=False)
         bd.preferences["biosphere_database"] = biosphere_name
 
-        soup = SingleOutputEcospold2Importer(lci_path / "datasets", db_name)
+        soup = SingleOutputEcospold2Importer(dirpath=lci_path / "datasets", db_name=db_name, biosphere_database_name=biosphere_name)
         soup.apply_strategies()
         if not soup.all_linked:
             raise ValueError(
diff --git a/bw2io/importers/ecospold2.py b/bw2io/importers/ecospold2.py
index 63c9c11e..15d10e81 100644
--- a/bw2io/importers/ecospold2.py
+++ b/bw2io/importers/ecospold2.py
@@ -53,7 +53,7 @@ def __init__(
         self,
         dirpath: str,
         db_name: str,
-        biosphere_database: str | None = None,
+        biosphere_database_name: str | None = None,
         extractor: Any=Ecospold2DataExtractor,
         use_mp: bool=True,
         signal: Any=None,
@@ -69,7 +69,7 @@ def __init__(
             Path to the directory containing the ecospold2 file.
         db_name : str
             Name of the LCI database.
-        biosphere_database : str | None
+        biosphere_database_name : str | None
             Name of biosphere database to link to. Uses `config.biosphere` if not provided.
         extractor : class
             Class for extracting data from the ecospold2 file, by default Ecospold2DataExtractor.
@@ -102,7 +102,7 @@ def __init__(
             drop_unspecified_subcategories,
             fix_ecoinvent_flows_pre35,
             drop_temporary_outdated_biosphere_flows,
-            partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database or config.biosphere),
+            partial(link_biosphere_by_flow_uuid, biosphere=biosphere_database_name or config.biosphere),
             link_internal_technosphere_by_composite_code,
             delete_exchanges_missing_activity,
             delete_ghost_exchanges,
@@ -111,7 +111,7 @@ def __init__(
             convert_activity_parameters_to_list,
             add_cpc_classification_from_single_reference_product,
             delete_none_synonyms,
-            partial(update_social_flows_in_older_consequential, biosphere_db=Database(config.biosphere)),
+            partial(update_social_flows_in_older_consequential, biosphere_db=Database(biosphere_database_name or config.biosphere)),
         ]
 
         if reparametrize_lognormals:

From 22994ebbbada2c293610593292a4f51add79e99c Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 22:00:21 +0100
Subject: [PATCH 7/8] Reformatting

---
 bw2io/ecoinvent.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index a62f009c..5c0e2558 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -61,7 +61,7 @@ def import_ecoinvent_release(
     lci: bool = True,
     lcia: bool = True,
     biosphere_name: str | None = None,
-    use_existing_biosphere: bool = False
+    use_existing_biosphere: bool = False,
 ) -> None:
     """
     Import an ecoinvent LCI and/or LCIA release.
@@ -216,7 +216,11 @@ def import_ecoinvent_release(
             eb.write_database(overwrite=False)
         bd.preferences["biosphere_database"] = biosphere_name
 
-        soup = SingleOutputEcospold2Importer(dirpath=lci_path / "datasets", db_name=db_name, biosphere_database_name=biosphere_name)
+        soup = SingleOutputEcospold2Importer(
+            dirpath=lci_path / "datasets",
+            db_name=db_name,
+            biosphere_database_name=biosphere_name,
+        )
         soup.apply_strategies()
         if not soup.all_linked:
             raise ValueError(
@@ -311,9 +315,13 @@ def import_ecoinvent_release(
                         for name in same_context
                     ]
                 )
-                if candidates[0][0] < 3 and candidates[0][0] != candidates[1][0] and candidates[0][1][0].lower() == row['name'][0].lower():
+                if (
+                    candidates[0][0] < 3
+                    and candidates[0][0] != candidates[1][0]
+                    and candidates[0][1][0].lower() == row["name"][0].lower()
+                ):
                     new_name = candidates[0][1]
-                    pair = (new_name, row['name'])
+                    pair = (new_name, row["name"])
                     if pair not in substituted:
                         print(f"Substituting {new_name} for {row['name']}")
                         substituted.add(pair)

From b596c2124132deb70cf9779c1844c3be5659a9b4 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Thu, 9 Nov 2023 22:02:16 +0100
Subject: [PATCH 8/8] Add importer signal for AB love

---
 bw2io/ecoinvent.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/bw2io/ecoinvent.py b/bw2io/ecoinvent.py
index 5c0e2558..0769bd8a 100644
--- a/bw2io/ecoinvent.py
+++ b/bw2io/ecoinvent.py
@@ -2,6 +2,7 @@
 import zipfile
 from collections import defaultdict
 from pathlib import Path
+from typing import Any
 
 import bw2data as bd
 import ecoinvent_interface as ei
@@ -62,6 +63,7 @@ def import_ecoinvent_release(
     lcia: bool = True,
     biosphere_name: str | None = None,
     use_existing_biosphere: bool = False,
+    importer_signal: Any = None,
 ) -> None:
     """
     Import an ecoinvent LCI and/or LCIA release.
@@ -102,6 +104,8 @@ def import_ecoinvent_release(
         Name of database to store biosphere flows. They will be stored in the main LCI database if not specified.
     use_existing_biosphere
         Flag on whether to create a new biosphere database or use an existing one
+    importer_signal
+        Used by the Activity Browser to provide feedback during the import
 
     Examples
     --------
@@ -220,6 +224,7 @@ def import_ecoinvent_release(
             dirpath=lci_path / "datasets",
             db_name=db_name,
             biosphere_database_name=biosphere_name,
+            signal=importer_signal,
         )
         soup.apply_strategies()
         if not soup.all_linked: