diff --git a/pyam/core.py b/pyam/core.py index 67e82d934..edf7d35ed 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -70,6 +70,7 @@ remove_from_list, to_list, write_sheet, + get_excel_file_with_kwargs, ) from pyam.validation import _exclude_on_fail, _validate @@ -197,7 +198,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs): # noqa: C # if initializing from xlsx, try to load `meta` table from file if meta_sheet and isinstance(data, Path) and data.suffix in [".xlsx", ".xls"]: - excel_file = pd.ExcelFile(data) + excel_file, kwargs = get_excel_file_with_kwargs(data, **kwargs) if meta_sheet in excel_file.sheet_names: self.load_meta(excel_file, sheet_name=meta_sheet, ignore_conflict=True) diff --git a/pyam/utils.py b/pyam/utils.py index 4628a861f..5cbf50c55 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -94,13 +94,30 @@ def write_sheet(writer, name, df, index=False): writer.sheets[name].set_column(i, i, width) # assumes xlsxwriter as engine +def get_excel_file_with_kwargs(path, **kwargs): + """Return a `pandas.ExcelFile` and a dict of unused kwargs. + + When reading an Excel file, this function finds keyword arguments that + should be passed to `pandas.ExcelFile`, and returns a `pandas.ExcelFile` + instance along with the remaining keyword arguments (which presumably + will be used for other purposes by the calling function). + """ + EXCEL_FILE_KWS = ('engine', 'storage_options', 'engine_kwargs') + kwargs = kwargs.copy() + excel_file_kwargs = { + k: kwargs.pop(k) for k in EXCEL_FILE_KWS if k in kwargs + } + return pd.ExcelFile(path, **excel_file_kwargs), kwargs + + def read_pandas(path, sheet_name=["data*", "Data*"], *args, **kwargs): """Read a file and return a pandas.DataFrame""" if isinstance(path, Path) and path.suffix == ".csv": return pd.read_csv(path, *args, **kwargs) - with pd.ExcelFile(path) as xl: + xlfile, kwargs = get_excel_file_with_kwargs(path, **kwargs) + with xlfile as xl: # reading multiple sheets sheet_names = pd.Series(xl.sheet_names) if len(sheet_names) > 1: