Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for pandas dataframes, multindex formatting #1046

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
reupping tables updates
afriedman412 committed Dec 5, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 579b9a17773fd29e10cb30894c1555871ab99cdb
48 changes: 48 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
VENV_NAME = venv
VENV_PATH = $(VENV_NAME)/bin/activate
SRC_DIR = fpdf
PYTHON := venv/bin/python

.PHONY: venv

venv:
ifeq ($(OS),Windows_NT)
python -m venv $(VENV_NAME)
. $(VENV_PATH) && pip install -r test/requirements.txt
else
python3 -m venv $(VENV_NAME)
. $(VENV_PATH); pip install -r test/requirements.txt
endif

.PHONY: test

test:
@export FLASK_ENV=test && python -m pytest test/

.PHONY: install

install: venv
. $(VENV_PATH); pip install --upgrade -r test/requirements.txt

.PHONY: clean

clean:
rm -rf $(VENV_NAME)

check-autopep:
${PYTHON} -m autopep8 $(SRC_DIR)/*.py test/*.py --in-place

check-isort:
${PYTHON} -m isort --check-only $(SRC_DIR) test

check-flake:
${PYTHON} -m flake8 $(SRC_DIR) test

check-mypy:
${PYTHON} -m mypy --strict --implicit-reexport $(SRC_DIR)

lint: check-flake check-mypy check-autopep check-isort

format:
${PYTHON} -m autopep8 $(SRC_DIR)/*.py test/*.py --in-place
${PYTHON} -m isort $(SRC_DIR) test
27 changes: 9 additions & 18 deletions docs/Maths.md
Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ Result:

Create a table with pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html):
```python
from fpdf import FPDF
from fpdf.adapters.table_pandas import FPDF_pandas
import pandas as pd

df = pd.DataFrame(
@@ -121,25 +121,16 @@ df = pd.DataFrame(
}
)

df = df.applymap(str) # Convert all data inside dataframe into string type

columns = [list(df)] # Get list of dataframe columns
rows = df.values.tolist() # Get list of dataframe rows
data = columns + rows # Combine columns and rows in one list

pdf = FPDF()
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
with pdf.table(borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160) as table:
for data_row in data:
row = table.row()
for datum in data_row:
row.cell(datum)
pdf.dataframe(df,
borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160)
pdf.output("table_from_pandas.pdf")
```

2 changes: 2 additions & 0 deletions docs/Tables.md
Original file line number Diff line number Diff line change
@@ -380,6 +380,8 @@ Result:

![](table_with_multiple_headings.png)

This also works with index columns. Pass any integer to the `num_index_columns` argument when calling `Table()` and that many columns will be formatted according to the `index_style` argument.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this only applies to the panda adapter, and whould probably be removed.

However, it would be nice to add a section about the panda adapter to this file, as I'm sure many fpdf2 users would be happy to find out about it while reading this page 🙂


## Table from pandas DataFrame

_cf._ [Maths documentation page](Maths.md#using-pandas)
38 changes: 38 additions & 0 deletions fpdf/adapters/table_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pandas import MultiIndex
from fpdf import FPDF


class FPDF_pandas(FPDF):
Copy link
Member

@Lucas-C Lucas-C Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this adapter only adds a single method, I think we should provide a mixin instead, so that fpdf2 users can combine several mixins if they want to!

class PandasMixin:
    def dataframe(self, df, **kwargs):
        ...

And that would be how end-users make use of it:

from fpdf import FPDF
from fpdf.pandas import PandasMixin

class MyPDF(FPDF, PandasMixin):
    pass

pdf = MyPDF()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, ...)

What do you think of this approach @afriedman412 🙂?

def __init__(self, **kwargs):
super().__init__(**kwargs)

def dataframe(self, df, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some docstring would be nice before merging this PR 🙂 (as well as an addition in CHANGELOG.md)

with self.table(
num_index_columns=df.index.nlevels,
num_heading_rows=df.columns.nlevels,
**kwargs
) as table:
TABLE_DATA = format_df(df)
for data_row in TABLE_DATA:
row = table.row()
for datum in data_row:
row.cell(datum)


def format_df(df, char: str = " ", convert_to_string: bool = True) -> list:
data = df.map(str).values.tolist()
if isinstance(df.columns, MultiIndex):
heading = [list(c) for c in zip(*df.columns)]
else:
heading = df.columns.values.reshape(1, len(df.columns)).tolist()

if isinstance(df.index, MultiIndex):
index = [list(c) for c in df.index]
else:
index = df.index.values.reshape(len(df), 1).tolist()
padding = [list(char) * df.index.nlevels] * df.columns.nlevels

output = [i + j for i, j in zip(padding + index, heading + data)]
if convert_to_string:
output = [[str(d) for d in row] for row in output]
return output
17 changes: 14 additions & 3 deletions fpdf/table.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
from .util import Padding

DEFAULT_HEADINGS_STYLE = FontFace(emphasis="BOLD")
DEFAULT_INDEX_STYLE = FontFace(emphasis="BOLD")


class Table:
@@ -32,6 +33,7 @@ def __init__(
gutter_height=0,
gutter_width=0,
headings_style=DEFAULT_HEADINGS_STYLE,
index_style=DEFAULT_INDEX_STYLE,
Copy link
Member

@Lucas-C Lucas-C Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New parameters should always be added at the end of the parameters list, otherwise the code of existing fpdf2 users would break if they are currently passing parameters by value, up to line_height for example there:

align = "CENTER"
v_align = "MIDDLE",
borders_layout = TableBordersLayout.ALL
cell_fill_color = None
cell_fill_mode = TableCellFillMode.NONE
col_widths = None
first_row_as_headings = True
gutter_height = 0
gutter_width = 0
headings_style = DEFAULT_HEADINGS_STYLE
line_height = None

with pdf.table(align, v_align, borders_layout, cell_fill_color, cell_fill_mode, col_widths, first_row_as_headings, gutter_height, gutter_width, headings_style, line_height) as table:
    ...  # this code would break after merging this PR, because line_height would be passed to index_style

line_height=None,
markdown=False,
text_align="JUSTIFY",
@@ -40,6 +42,7 @@ def __init__(
padding=None,
outer_border_width=None,
num_heading_rows=1,
num_index_columns=0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that those 2 new parameters are only required for rendering pandas dataframes, I think they should not be added there but in PandasMixin, even if that means overriding some Table methods in PandasMixin 🙂

):
"""
Args:
@@ -58,6 +61,8 @@ def __init__(
gutter_width (float): optional horizontal space between columns
headings_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
index_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
line_height (number): optional. Defines how much vertical space a line of text will occupy
markdown (bool): optional, default to False. Enable markdown interpretation of cells textual content
text_align (str, fpdf.enums.Align, tuple): optional, default to JUSTIFY. Control text alignment inside cells.
@@ -72,6 +77,7 @@ def __init__(
num_heading_rows (number): optional. Sets the number of heading rows, default value is 1. If this value is not 1,
first_row_as_headings needs to be True if num_heading_rows>1 and False if num_heading_rows=0. For backwards compatibility,
first_row_as_headings is used in case num_heading_rows is 1.
num_index_cols (number): optional. Sets the number of index columns, default value is 0.
"""
self._fpdf = fpdf
self._align = align
@@ -85,12 +91,14 @@ def __init__(
self._gutter_height = gutter_height
self._gutter_width = gutter_width
self._headings_style = headings_style
self._index_style = index_style
self._line_height = 2 * fpdf.font_size if line_height is None else line_height
self._markdown = markdown
self._text_align = text_align
self._width = fpdf.epw if width is None else width
self._wrapmode = wrapmode
self._num_heading_rows = num_heading_rows
self.num_index_columns = num_index_columns
self._initial_style = None
self.rows = []

@@ -129,13 +137,16 @@ def __init__(
self.row(row)

def row(self, cells=(), style=None):
"Adds a row to the table. Yields a `Row` object."
"Adds a row to the table. Yields a `Row` object. Styles first `self.num_index_columns` cells with `self.index_style`"
if self._initial_style is None:
self._initial_style = self._fpdf.font_face()
row = Row(self, style=style)
self.rows.append(row)
for cell in cells:
row.cell(cell)
for n, cell in enumerate(cells):
if n < self.num_index_columns:
row.cell(cell, style=self._index_style)
else:
row.cell(cell)
return row

def render(self):
Binary file added test/table/table_pandas_multiheading.pdf
Binary file not shown.
Binary file added test/table/table_pandas_multiindex.pdf
Binary file not shown.
20 changes: 20 additions & 0 deletions test/table/test_table.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
from pathlib import Path
import pandas as pd

import pytest

from fpdf import FPDF, FPDFException
from fpdf.adapters.table_pandas import FPDF_pandas
from fpdf.drawing import DeviceRGB
from fpdf.fonts import FontFace
from test.conftest import assert_pdf_equal, LOREM_IPSUM
@@ -37,6 +39,13 @@
("3", "4", "5", "6", "7", "8"),
)

MULTI_LABEL_TABLE_DATA = {
("tall", "fat"): {"color": "red", "number": 7, "happy": False},
("short", "fat"): {"color": "green", "number": 8, "happy": True},
("tall", "lean"): {"color": "blue", "number": 9, "happy": True},
("short", "lean"): {"color": "yellow", "number": 15, "happy": False},
}


def test_table_simple(tmp_path):
pdf = FPDF()
@@ -86,6 +95,17 @@ def test_table_with_syntactic_sugar(tmp_path):
table.row(TABLE_DATA[4])
assert_pdf_equal(pdf, HERE / "table_simple.pdf", tmp_path)

def test_pandas_multi_label(tmp_path):
for df, i in zip(
[pd.DataFrame(MULTI_LABEL_TABLE_DATA), pd.DataFrame(MULTI_LABEL_TABLE_DATA).T],
["heading", "index"],
):
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, borders_layout="MINIMAL", text_align="CENTER", width=160)
assert_pdf_equal(pdf, HERE / f"table_pandas_multi{i}.pdf", tmp_path)


def test_table_with_fixed_col_width(tmp_path):
pdf = FPDF()