diff --git a/PDF Organizer Script/README.md b/PDF Organizer Script/README.md new file mode 100644 index 00000000..f24eb570 --- /dev/null +++ b/PDF Organizer Script/README.md @@ -0,0 +1,39 @@ +# PDF Organizer Script + +### Description + +The PDF Organizer is a Python script designed to help you manage your collection of PDF files efficiently. It analyzes each PDF file in a specified directory, reading metadata such as titles and authors, and organizes the files into subfolders based on these metadata categories. Additionally, it renames the PDFs according to a consistent format ("Author - Title.pdf") and generates a summary report of the folder's contents, including a count of PDFs per category. + +## Features + +### Metadata Extraction: + +Reads metadata from PDF files to get the title, author, and number of pages. + +### Dynamic Organization: + +Automatically creates subfolders for different authors and moves the PDFs into these folders. + +### File Renaming: + +Renames PDF files following a consistent naming convention for easier identification. + +### Summary Report: + +Generates a report detailing the organization process, including the number of PDFs processed and sorted by category. + +## Prerequisites + +Before you can use the PDF Organizer script, you need to have Python installed on your system. Additionally, the script depends on the PyPDF2 library for reading PDF metadata. + +## Installation + +1. Install Python +2. Install PyPDF2: Run the following command to install the PyPDF2 library: pip install PyPDF2 + +## Usage + +1. Place the pdf_organizer.py script in a directory of your choice. +2. Open a terminal or command prompt and navigate to the directory where the script is located. +3. Run the script with Python by executing the following command: python pdf_organizer.py +4. Before running the script, make sure to modify the source_folder variable in the script to point to the directory containing your PDF files and the report_path variable to where you want the summary report to be saved. diff --git a/PDF Organizer Script/pdf_organizer.py b/PDF Organizer Script/pdf_organizer.py new file mode 100644 index 00000000..baffa0ff --- /dev/null +++ b/PDF Organizer Script/pdf_organizer.py @@ -0,0 +1,66 @@ +import os +import shutil +import string +from PyPDF2 import PdfReader + +# Function to extract PDF metadata +def extract_pdf_metadata(pdf_path): + try: + reader = PdfReader(pdf_path) + metadata = reader.metadata + num_pages = len(reader.pages) + title = metadata.get('/Title', 'Unknown Title') + author = metadata.get('/Author', 'Unknown Author') + return {'title': title, 'author': author, 'num_pages': num_pages} + except Exception as e: + print(f"Error reading {pdf_path}: {e}") + return None + +# Function to organize PDFs into subfolders based on author +def organize_pdfs_by_author(source_folder): + pdf_files = [f for f in os.listdir(source_folder) if f.endswith('.pdf')] + summary = {} + + for pdf_file in pdf_files: + pdf_path = os.path.join(source_folder, pdf_file) + metadata = extract_pdf_metadata(pdf_path) + + if metadata: + author_folder = os.path.join(source_folder, sanitize_filename(metadata['author'])) + if not os.path.exists(author_folder): + os.makedirs(author_folder) + + new_pdf_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf" + new_pdf_path = os.path.join(author_folder, new_pdf_name) + + shutil.move(pdf_path, new_pdf_path) + + if metadata['author'] in summary: + summary[metadata['author']] += 1 + else: + summary[metadata['author']] = 1 + + return summary + +# Function to sanitize filenames to remove invalid characters +def sanitize_filename(filename): + valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + sanitized_filename = ''.join(c for c in filename if c in valid_chars) + return sanitized_filename.strip() + +# Function to generate a summary report +def generate_summary_report(summary, report_path): + with open(report_path, 'w') as report_file: + for author, count in summary.items(): + report_file.write(f"Author: {author}, PDFs: {count}\n") + +# Main function to execute the organizer +def main(): + source_folder = '/path/to/pdf/folder' + report_path = '/path/to/summary/report.txt' + summary = organize_pdfs_by_author(source_folder) + generate_summary_report(summary, report_path) + print("PDF organization and summary report generation complete.") + +if __name__ == "__main__": + main()