Skip to content

Commit

Permalink
Handling kwargs to be passed to pandas.ExcelFile
Browse files Browse the repository at this point in the history
Detects kwargs passed to `IamDataFrame.__init__` and to `read_pandas` that
need to be passed to `pandas.ExcelFile.__init__` to be handled properly.
  • Loading branch information
korsbakken committed Aug 26, 2024
1 parent bc56c53 commit 38ed4c5
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
3 changes: 2 additions & 1 deletion pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
remove_from_list,
to_list,
write_sheet,
get_excel_file_with_kwargs,
)
from pyam.validation import _exclude_on_fail, _validate

Expand Down Expand Up @@ -197,7 +198,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs): # noqa: C

# if initializing from xlsx, try to load `meta` table from file
if meta_sheet and isinstance(data, Path) and data.suffix in [".xlsx", ".xls"]:
excel_file = pd.ExcelFile(data)
excel_file, kwargs = get_excel_file_with_kwargs(data, **kwargs)
if meta_sheet in excel_file.sheet_names:
self.load_meta(excel_file, sheet_name=meta_sheet, ignore_conflict=True)

Expand Down
19 changes: 18 additions & 1 deletion pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,30 @@ def write_sheet(writer, name, df, index=False):
writer.sheets[name].set_column(i, i, width) # assumes xlsxwriter as engine


def get_excel_file_with_kwargs(path, **kwargs):
"""Return a `pandas.ExcelFile` and a dict of unused kwargs.
When reading an Excel file, this function finds keyword arguments that
should be passed to `pandas.ExcelFile`, and returns a `pandas.ExcelFile`
instance along with the remaining keyword arguments (which presumably
will be used for other purposes by the calling function).
"""
EXCEL_FILE_KWS = ('engine', 'storage_options', 'engine_kwargs')
kwargs = kwargs.copy()
excel_file_kwargs = {
k: kwargs.pop(k) for k in EXCEL_FILE_KWS if k in kwargs
}
return pd.ExcelFile(path, **excel_file_kwargs), kwargs


def read_pandas(path, sheet_name=["data*", "Data*"], *args, **kwargs):
"""Read a file and return a pandas.DataFrame"""

if isinstance(path, Path) and path.suffix == ".csv":
return pd.read_csv(path, *args, **kwargs)

with pd.ExcelFile(path) as xl:
xlfile, kwargs = get_excel_file_with_kwargs(path, **kwargs)
with xlfile as xl:
# reading multiple sheets
sheet_names = pd.Series(xl.sheet_names)
if len(sheet_names) > 1:
Expand Down

0 comments on commit 38ed4c5

Please sign in to comment.