-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCamelotexcel.py
31 lines (22 loc) · 906 Bytes
/
Camelotexcel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import camelot
import pandas as pd
import os
pdf_path = "FinancialStatement-2024-I-AALI.pdf"
output_dir = "extracted_content_page_9"
os.makedirs(output_dir, exist_ok=True)
page_number = '1'
tables = camelot.read_pdf(pdf_path, pages=page_number, flavor='stream')
if tables:
combined_df = pd.concat([table.df for table in tables])
csv_filename = os.path.join(output_dir, f"Extracted.csv")
combined_df.to_csv(csv_filename, index=False, header=False)
print(f"Content from page 1 saved to {csv_filename}")
txt_filename = os.path.join(output_dir, f"Extracted.txt")
with open(txt_filename, 'w', encoding='utf-8') as txt_file:
for table in tables:
txt_file.write(table.data_to_string())
txt_file.write('\n\n')
print(f"Content from page 1 also saved to {txt_filename}")
else:
print("No content found on page 1.")
print("Extraction complete.")