Skip to content
K E R L edited this page Apr 18, 2020 · 1 revision

Welcome to the ISDA.pdf- wiki! import PyPDF2 import re import csv

##################################### ############### PDF Extraction module

#Write a for-loop to open many files (leave a comment if you'd like to learn how). filename = 'SampleISDA.pdf'

#open allows you to read the file. pdfFileObj = open(filename,'rb')

#The pdfReader variable is a readable object that will be parsed. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)

#Discerning the number of pages will allow us to parse through all the pages. num_pages = pdfReader.numPages count = 0 text = ""

#The while loop will read each page. while count < num_pages: pageObj = pdfReader.getPage(count) count +=1 text += pageObj.extractText()

#This if statement exists to check if the above library returned words. It's done because PyPDF2 cannot read scanned files. if text != "": text = text

#If the above returns as False, we run the OCR library textract to #convert scanned/image based PDF files into text. else: text = textract.process(fileurl, method='tesseract', language='eng')

#Now we have a text variable that contains all the text derived from our PDF file. Type print(text) to see what it contains. It likely contains a lot of spaces, possibly junk such as '\n,' etc.

############################################ ################## RegEx Module

if re.search("Specified Entity | Party A| Not Applicable", text): re.search1 = 'Not applicable' print("Schedule (a): Specified Entity not applicable to Party A") else: print("Specified Entity is applicable to Party A")

if re.search("Termination Currency| United States Dollar", text): re.search2 = 'USD' print("Schedule (g): Termination Currency is USD")

else: print("Termination Currency is British Pound")

if re.search("Governing Law | New York", text): re.search3 = 'New York' print("Schedule (h): Governing Law is New York")

else: print("Governing law is English")

#################################### ############# CSV Module

ISDA_terms = ['Specififed Entity Party A', 'Specified Entity Party B', 'Specified Transaction', 'Certain Events of Credit Default', 'Automatic Early Termination', 'Payments on Early Termination', 'Termination Currency', 'Additional Termination Events', 'Payer Representations', 'Payee Representations', 'Addresess for Notices CPTY', 'Process Agent', 'Offices', 'Multibranch Party', 'Calculation Agent', 'Credit Support Document', 'Credit Support Provider', 'Governing Law', 'Netting of Payments', 'Afffiliate', 'Definitions', 'Downgrade Provisions', 'Termination Currency', 'Governing Law', 'Thursday', 'Friday'] values = [re.search1, re.search2, re.search2, re.search3, 'v5' 'v6']

with open('ISDAOutputFile.csv', 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') csv_writer.writerow(ISDA_terms) csv_writer.writerow(values)

Clone this wiki locally