Skip to content

Commit

Permalink
qwen2.5 testing
Browse files Browse the repository at this point in the history
  • Loading branch information
SawyerCzupka committed Feb 6, 2025
1 parent ee910f2 commit 5b2059c
Show file tree
Hide file tree
Showing 27 changed files with 699 additions and 0 deletions.
124 changes: 124 additions & 0 deletions resources/TEST_DATA/docx_table_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from docx import Document
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
from reportlab.lib.units import inch
from PIL import Image
import pdf2image
import tempfile
import os


def get_table_dimensions(table):
"""Get the actual dimensions of the table including merged cells"""
max_cols = 0
for row in table.rows:
col_count = 0
for cell in row.cells:
# Account for grid_span (horizontal merging)
grid_span = cell._tc.grid_span
col_count += grid_span if grid_span else 1
max_cols = max(max_cols, col_count)
return len(table.rows), max_cols


def extract_table_data(table):
"""Extract data from docx table handling merged cells"""
data = []
row_idx = 0
while row_idx < len(table.rows):
row = table.rows[row_idx]
row_data = []
col_idx = 0

while col_idx < len(row.cells):
cell = row.cells[col_idx]

# Get cell content
text = cell.text.strip()

# Handle vertical merging
v_merge = cell._tc.get_or_add_tcPr().get_or_add_vMerge()
if v_merge.val == "continue":
# Use value from cell above
text = data[row_idx - 1][col_idx]

# Handle horizontal merging
grid_span = cell._tc.grid_span
if grid_span > 1:
# Add empty strings for merged columns
row_data.append(text)
row_data.extend([""] * (grid_span - 1))
col_idx += grid_span
else:
row_data.append(text)
col_idx += 1

data.append(row_data)
row_idx += 1

return data


def table_to_image(table, output_path):
"""Convert a single table to an image"""
# Get table data and dimensions
table_data = extract_table_data(table)
rows, cols = get_table_dimensions(table)

# Create a temporary PDF file
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_pdf:
doc = SimpleDocTemplate(
tmp_pdf.name,
pagesize=(
letter[0],
letter[1] * (rows / 40 + 1),
), # Adjust page height based on rows
)

# Create ReportLab table
rl_table = Table(table_data)

# Add table style
style = TableStyle(
[
("GRID", (0, 0), (-1, -1), 1, colors.black),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 10),
("BOTTOMPADDING", (0, 0), (-1, -1), 6),
("TOPPADDING", (0, 0), (-1, -1), 6),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
]
)
rl_table.setStyle(style)

# Build PDF
doc.build([rl_table])

# Convert PDF to image
images = pdf2image.convert_from_path(tmp_pdf.name)

# Since we designed the PDF to fit the table on one page,
# we should only have one image
if images:
images[0].save(output_path)

# Clean up temporary PDF
os.unlink(tmp_pdf.name)
return output_path


def extract_tables_as_images(docx_path, output_dir="table_images"):
"""Extract all tables from a DOCX file as separate images"""
os.makedirs(output_dir, exist_ok=True)
doc = Document(docx_path)

for i, table in enumerate(doc.tables):
output_path = os.path.join(output_dir, f"table_{i}.png")
table_to_image(table, output_path)
print(f"Saved table {i} to {output_path}")


if __name__ == "__main__":
docx_path = "p9467_doc0__4-1-16__Antigua_Barbuda_CD_9467_PIF.docx"
extract_tables_as_images(docx_path)
64 changes: 64 additions & 0 deletions resources/TEST_DATA/docx_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from docx import Document
import pandas as pd

# DOCX_FILE = "p9467_doc2__9-7-17__Antigua_and_Barbuda_CD_9467_ProDoc__revised.docx"
DOCX_FILE = "p9467_doc0__4-1-16__Antigua_Barbuda_CD_9467_PIF.docx"


def main():
doc = Document(DOCX_FILE)

tables = doc.tables

print(f"Number of tables: {len(tables)}")
print(tables)


def extract_docx_tables(docx_path):
doc = Document(docx_path)
tables = []

for table in doc.tables:
data = []

# Get headers
headers = []
for cell in table.rows[0].cells:
headers.append(cell.text.strip())

# Get data rows
for row in table.rows[1:]:
row_data = []
for cell in row.cells:
# Handle nested tables if they exist
if cell.tables:
nested_text = []
for nested_table in cell.tables:
for nested_row in nested_table.rows:
nested_text.append(
" ".join(c.text.strip() for c in nested_row.cells)
)
row_data.append("\n".join(nested_text))
else:
row_data.append(cell.text.strip())
data.append(row_data)

# Create DataFrame
df = pd.DataFrame(data, columns=headers)
tables.append(df)

return tables


def save_tables_csv(tables: list[pd.DataFrame], output_dir: str = "outputs"):
for i, table in enumerate(tables):
table.to_csv(f"{output_dir}/table_{i + 1}.csv", index=False)


if __name__ == "__main__":
tables = extract_docx_tables(DOCX_FILE)
# for i, table in enumerate(tables):
# print(f"Table {i + 1}:")
# print(table)
# print("\n")
save_tables_csv(tables)
7 changes: 7 additions & 0 deletions resources/TEST_DATA/outputs/table_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Project Title:,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda,Monitoring and assessment of MEA implementation and environmental trends in Antigua and Barbuda
Country(ies):,Antigua and Barbuda,GEF Project ID:,GEF Project ID:,GEF Project ID:,GEF Project ID:,9467
GEF Agency(ies):,UNDP,GEF Agency Project ID:,GEF Agency Project ID:,GEF Agency Project ID:,GEF Agency Project ID:,5425
Other Executing Partner(s):,Department of Environment,Submission Date:,Submission Date:,Submission Date:,Submission Date:,1 April 2016
GEF Focal Area(s):,Multi-Focal Areas,Project Duration (Months),Project Duration (Months),Project Duration (Months),Project Duration (Months),48
Integrated Approach Pilot,IAP-Cities IAP-Commodities IAP-Food Security,IAP-Cities IAP-Commodities IAP-Food Security,IAP-Cities IAP-Commodities IAP-Food Security,Corporate Program: SGP,Corporate Program: SGP,Corporate Program: SGP
Name of parent program:,[if applicable],[if applicable],Agency Fee ($),Agency Fee ($),"83,600","83,600"
4 changes: 4 additions & 0 deletions resources/TEST_DATA/outputs/table_10.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"Agency Coordinator, Agency name",Signature,"Date
(MM/dd/yyyy)",Project Contact Person,Telephone,Email
"Adriana Dinu,
UNDP-GEF Executive Coordinator",,03/31/2016,"Tom Twining-Ward, Senior Technical Advisor, UNDP (Green-LECRDs)",+90 850 2882 612,[email protected]
12 changes: 12 additions & 0 deletions resources/TEST_DATA/outputs/table_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"Objectives/Programs (Focal Areas, Integrated Approach Pilot, Corporate Programs)",Trust Fund,(in $),(in $)
"Objectives/Programs (Focal Areas, Integrated Approach Pilot, Corporate Programs)",Trust Fund,GEF Project Financing,Co-financing
CCCD-1,GEFTF,"800,000","800,000"
Project Management,GEFTF,"80,000",
,,,
,,,
,,,
,,,
,,,
,,,
,,,
Total Project Cost,,"880,000","800,000"
28 changes: 28 additions & 0 deletions resources/TEST_DATA/outputs/table_3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress,Project Objective: To strengthen institutional capacity for effectively managing information systems for national MEA obligations and monitoring impact and progress
Project Components,Financing Type,Project Outcomes,Project Outputs,Trust Fund,(in $),(in $)
Project Components,Financing Type,Project Outcomes,Project Outputs,Trust Fund,GEF Project Financing,Co-financing
1. Environmental indicators and monitoring system for Antigua and Barbuda,TA,1.1. Institutional arrangements and operational platform are enhanced for environmental monitoring in Antigua and Barbuda,"1.1. A set of core results-based environmental indicators is selected, with baseline data collected including from traditional knowledge sources and a cost-effective monitoring plan is agreed

1.2 Map national and regional information sources available to track the state and trends of the environment

1.3 Institutional arrangements and inter-agency agreements on information management are concluded involving at least 7 agencies and/or research institutes, and regulations are developed for the relevant section of the Environment Act 2015

1.4 A user-friendly online platform is established and updated, presenting available information on core environmental indicators

1.5 Individual capacity building (training) to effectively maintain and manage the environmental information system",GEFTF,"500,000","500,000"
"2. Generate,
access and use information
and knowledge",TA,2.1. The environmental information system (developed in Component 1) is recognized and used by different sectors of government and civil society as the official national source of environmental information,"2.1 A sustainable financing and management strategy is developed for the national environmental information system

2.2 The national environmental information system is used for reporting to at least 3 MEAs

2.3 The format and methodology for a comprehensive state of the environment report is established, with one national State of the Environment report published

2.4 The national environment information system is integrated into national processes, including development application reviews and environmental and social safeguard (ESS) assessments

2.5 A public information campaign on accessing and using the environmental information system is launched, in particular targeting educational institutions",GEFTF,"300,000","300,000"
,,,,,,
,,,,,,
Subtotal,Subtotal,Subtotal,Subtotal,,"800,000","800,000"
Project Management Cost (PMC),Project Management Cost (PMC),Project Management Cost (PMC),Project Management Cost (PMC),GEFTF,"80,000",
Total Project Cost,Total Project Cost,Total Project Cost,Total Project Cost,,"880,000","800,000"
10 changes: 10 additions & 0 deletions resources/TEST_DATA/outputs/table_4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Sources of Co-financing,Name of Co-financier,Type of Co-financing,Amount ($)
Recipient Government,Ministry of Health and the Environment,In-kind,"500,000"
Donor Agency,World Bank/Nature Conservancy,Grants,"100,000"
GEF Agency,UNDP,In-kind,"100,000"
Donor Agency,IUCN,Grants,"100,000"
,,,
,,,
,,,
,,,
Total Co-financing,,,"800,000"
13 changes: 13 additions & 0 deletions resources/TEST_DATA/outputs/table_5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
GEF Agency,Trust Fund,"Country/
Regional/ Global",Focal Area,"Programming
of Funds",(in $),(in $),(in $)
GEF Agency,Trust Fund,"Country/
Regional/ Global",Focal Area,"Programming
of Funds",GEF Project Financing (a),Agency Fee (b)b),"Total
(c)=a+b"
UNDP,GEFTF,Antigua and Barbuda,Multi-focal Areas,Cross-Cutting Capacity,"880,000","83,600","963,600"
,,,,,,,0
,,,,,,,0
,,,,,,,0
,,,,,,,0
Total GEF Resources,Total GEF Resources,Total GEF Resources,Total GEF Resources,Total GEF Resources,"880,000","83,600","963,600"
13 changes: 13 additions & 0 deletions resources/TEST_DATA/outputs/table_6.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750","Project Preparation Grant amount requested: $50,000 PPG Agency Fee: 4,750"
GEF Agency,Trust Fund,"Country/
Regional/Global",Focal Area,"Programming
of Funds",(in $),(in $),(in $)
GEF Agency,Trust Fund,"Country/
Regional/Global",Focal Area,"Programming
of Funds",PPG (a),"Agency
Fee (b)","Total
c = a + b"
UNDP,GEF TF,Antigua and Barbuda,Multi-focal Areas,Cross-Cutting Capacity,"50,000","4,750","54,750"
,,,,,,,0
,,,,,,,0
Total PPG Amount,Total PPG Amount,Total PPG Amount,Total PPG Amount,Total PPG Amount,"50,000","4,750","54,750"
11 changes: 11 additions & 0 deletions resources/TEST_DATA/outputs/table_7.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Corporate Results,Replenishment Targets,Project Targets
Maintain globally significant biodiversity and the ecosystem goods and services that it provides to society,Improved management of landscapes and seascapes covering 300 million hectares,Hectares
"Sustainable land management in production systems (agriculture, rangelands, and forest landscapes)",120 million hectares under sustainable land management,Hectares
"Promotion of collective management of transboundary water systems and implementation of the full range of policy, legal, and institutional reforms and investments contributing to sustainable use and maintenance of ecosystem services",Water-food-ecosystems security and conjunctive management of surface and groundwater in at least 10 freshwater basins;,Number of freshwater basins
"Promotion of collective management of transboundary water systems and implementation of the full range of policy, legal, and institutional reforms and investments contributing to sustainable use and maintenance of ecosystem services",20% of globally over-exploited fisheries (by volume) moved to more sustainable levels,"Percent of fisheries, by volume"
4. Support to transformational shifts towards a low-emission and resilient development path,750 million tons of CO2e mitigated (include both direct and indirect),metric tons
"Increase in phase-out, disposal and reduction of releases of POPs, ODS, mercury and other chemicals of global concern","Disposal of 80,000 tons of POPs (PCB, obsolete pesticides)",metric tons
"Increase in phase-out, disposal and reduction of releases of POPs, ODS, mercury and other chemicals of global concern",Reduction of 1000 tons of Mercury,metric tons
"Increase in phase-out, disposal and reduction of releases of POPs, ODS, mercury and other chemicals of global concern",Phase-out of 303.44 tons of ODP (HCFC),ODP tons
"Enhance capacity of countries to implement MEAs (multilateral environmental agreements) and mainstream into national and sub-national policy, planning financial and legal frameworks",Development and sectoral planning frameworks integrate measurable targets drawn from the MEAs in at least 10 countries,Number of Countries: 1
"Enhance capacity of countries to implement MEAs (multilateral environmental agreements) and mainstream into national and sub-national policy, planning financial and legal frameworks",Functional environmental information systems are established to support decision-making in at least 10 countries,Number of Countries: 1
7 changes: 7 additions & 0 deletions resources/TEST_DATA/outputs/table_8.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Name,Position,Ministry,Date (MM/dd/yyyy)
Diann Black-Layne,"Director, Department of Environment",Ministry of Health and the Environment,03/11/2016
,,,
,,,
,,,
,,,
,,,
1 change: 1 addition & 0 deletions resources/TEST_DATA/outputs/table_9.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This request has been prepared in accordance with GEF policies and procedures and meets the GEF criteria for project identification and preparation under GEF-6.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"document_title": "GEF-6 GEF SECRETARIAT REVIEW FOR FULL-SIZED/MEDIUM-SIZED PROJECTS THE GEF/LDCF/SCCF TRUST FUND",
"project_details": {
"GEF ID": "10051",
"Country/Region": "Djibouti",
"Project Title": "Promoting a Better Access to Modern Energy Services through Sustainable Mini-grids and Hybrid Technologies in Djibouti",
"GEF Agency": "UNDP",
"GEF Agency Project ID": "6202 (UNDP)",
"Type of Trust Fund": "GEF Trust Fund",
"GEF Focal Area (s)": "Climate Change",
"GEF-6 Focal Area/ LDCF/SCCF Objective (s)": "CCM-1 Program 1;",
"Anticipated Financing PPG": "$50,000",
"Project Grant": "$863,242",
"Co-financing": "$4,000,000",
"Total Project Cost": "$4,913,242",
"PIF Approval": "May 18, 2018",
"Council Approval/Expected": "",
"CEO Endorsement/Approval": "",
"Expected Project Start Date": "",
"Program Manager": "Ming Yang",
"Agency Contact Person": "Saliou Toure"
},
"pif_review": {
"review_sections": {
"Project Consistency": {
"questions": [
{
"number": "1",
"text": "Is the project aligned with the relevant GEF strategic objectives and results framework?",
"secretariat_comment": {
"date": "4/18/2018 MY:",
"comment": "Yes, it is aligned with CCM-1: Technology Transfer, and Supportive Policies and Strategies; Program 1: Promote timely development, demonstration and financing of low-carbon technologies and mitigation options"
}
},
{
"number": "2",
"text": "Is the project consistent with the recipient country's national strategies and plans or reports and assessments under relevant conventions?",
"secretariat_comment": {
"date": "4/18/2018 MY:",
"comment": "Yes, it is stated on pages 17-18."
}
}
]
},
"Project Design": {
"questions": [
{
"number": "3",
"text": "Does the PIF sufficiently indicate the",
"secretariat_comment": {
"date": "4/18/2018 MY:",
"comment": ""
}
}
]
}
}
},
"footer": "GEF-6 FSP/MSP Review Template January2015",
"page_number": "1"
}
Loading

0 comments on commit 5b2059c

Please sign in to comment.