Skip to content

Commit

Permalink
Add report generation file to the deprecated steps
Browse files Browse the repository at this point in the history
Signed-off-by: Fabian Utech <[email protected]>
  • Loading branch information
ur-tech committed Feb 7, 2024
1 parent 9a3120b commit cab53ad
Showing 1 changed file with 230 additions and 0 deletions.
230 changes: 230 additions & 0 deletions deprecated/steps/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Fabian-Paul Utech <[email protected]>
# SPDX-FileCopyrightText: 2023 Ahmed Sheta <[email protected]>

import argparse
import os

import pandas as pd
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle

report_list = []

standard_group_format = {
# 1 pdf per lead (1 row in .csv)
"Lead": [
"Last Name",
"First Name",
"Company / Account",
"Phone",
"Email",
"Predicted Size",
],
# "Reviews": [
# "google_places_user_ratings_total",
# "google_places_rating",
# "google_places_price_level",
# "reviews_sentiment_score",
# ],
#'Region':[] starts with regional_atlas
# Regarding columns names if there are more than one '_' take the split after the second _
}

file_list = []


def process_lead(lead):
# Input search string (either specific leads or a whole file)
# Output: pd.series of a lead from leads_enriched.csv
try:
df = pd.read_csv("src/data/dummy_leads_email.csv", delimiter=",")
except FileNotFoundError:
raise FileNotFoundError("File not found.")
if os.path.exists(
os.path.dirname(lead)
): # If a path was specified (by default the dummy dataset)
df = pd.read_csv(lead, delimiter=",")
return df
elif isinstance(lead, list): # A specified group of leads
rows = df[df["Company / Account"] in lead]
return rows

elif isinstance(lead, str): # One specified lead
row = df[df["Company / Account"] == lead]
return row
else:
raise ValueError(
"Invalid type for 'lead'. It should be a single string, a list of strings, or a file path."
)


def process_format(fmt):
if isinstance(fmt, list): # Transform list to dictionary
new_fmt = {}

for value in fmt:
try:
key = str(standard_group_format[value])
except:
key = "Others"
if key in new_fmt:
new_fmt[key] = new_fmt[key].append(str(value))
else:
new_fmt[key] = [str(value)]

return new_fmt
elif isinstance(fmt, dict):
return fmt
elif fmt is None:
return standard_group_format
else:
raise ValueError(
"Invalid type for 'format'. It should be either a list or a dictionary."
)


def create_pdf(lead, format):
"""
Input: lead: pd.series
format: dict
Description: Function to create reports.
A report consists of tables of grouped features.
Output: '...'.pdf
"""
doc = SimpleDocTemplate(
f"src/data/reports/{lead['Company / Account']}.pdf", pagesize=A4
)
file_list.append(f"src/data/reports/{lead['Company / Account']}.pdf")

report_list.append(f"src/data/reports/{lead['Company / Account']}.pdf")

# Creating a Paragraph with a large font size and centered alignment
headline_style = getSampleStyleSheet()["Title"]
headline_style.fontSize = 32
headline_style.alignment = 0

headline_paragraph = Paragraph(lead["Company / Account"], headline_style)

# List for the 'Flowable' objects
elements = [headline_paragraph]
elements.append(Spacer(1, 50))

# Styles for tables and paragraphs
styles = getSampleStyleSheet()

groups = format.keys()

for group in groups:
title_paragraph = Paragraph(group, styles["Title"])
elements.append(title_paragraph)

col_names = format[group]

# Header row
split_col = [col_names[i : i + 4] for i in range(0, len(col_names), 5)]

# Center the table on the page
table_style = TableStyle(
[
("ALIGN", (0, 0), (-1, -1), "CENTER"), # center the text
(
"VALIGN",
(0, 0),
(-1, -1),
"MIDDLE",
), # put the text in the middle of the cell
("TEXTCOLOR", (0, 0), (-1, 0), colors.black),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"SPLITBYROWS",
(0, 0),
(-1, -1),
True,
), # Ensure rows are not split between pages
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
]
)

for group_columns in split_col:
header_row = group_columns
data_row = []
for column in group_columns:
try:
if lead[column] == "nan":
data_row.append("")
else:
data_row.append(str(lead[column]))
except:
data_row.append("")

table = [header_row, data_row]

pdf_table = Table(table)
pdf_table.setStyle(table_style)

# Add the table to the elements
elements.append(pdf_table)

# Add an empty line between tables
elements.append(Spacer(1, 25))

"""for k,v in tmp_data.items():
if isinstance(v, dict):
ul_items=[]
for key,val in v.items():
bolded_text = f'<b>{key}:</b>{val}'
ul_items.append(Paragraph(bolded_text,styles['Normal']))
col_index = list(tmp_data.keys()).index(k)
table_data[1][col_index] = ul_items"""

"""# Set left alignment for all non-header cells
for col in range(len(table_data[0])):
table_style.add('FONTNAME', (col, 0), (col, 0), 'Helvetica-Bold')
table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT')"""

# Build the PDF document
doc.build(elements)


def main():
# file_list=[]
parser = argparse.ArgumentParser(description="Process lead and format arguments.")
parser.add_argument(
"--lead",
default="src/data/dummy_leads_email.csv",
help="Lead argument: a single search-string, a list of strings, or a file path.",
)
parser.add_argument(
"--format", nargs="+", help="Format argument: a list or a dictionary."
)

args = parser.parse_args()

# Process lead argument (result: either specific row(/s) or a table)
# Choose lead with
processed_lead = process_lead(args.lead)
print("Generate the reports for the following leads: ")
print(processed_lead)

# Process format argument (result: format that is a dictionary)
processed_format = process_format(args.format)

# Generate report for every lead

for index, lead in processed_lead.iterrows():
create_pdf(lead, processed_format)

print("\nReports saved:")
for file in file_list:
print(f"{file}")

print()


if __name__ == "__main__":
main()

0 comments on commit cab53ad

Please sign in to comment.