-
Notifications
You must be signed in to change notification settings - Fork 6
/
pdftotext.py
55 lines (32 loc) · 817 Bytes
/
pdftotext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pdfplumber
def get_text(filename):
with pdfplumber.open(filename) as pdf:
words = []
pages = pdf.pages
for page in pages:
page_text = page.extract_text()
words.append(page_text)
text = (' '.join(words))
splitted_text = text.split()
for word in splitted_text:
if word.isalnum() == False:
del word
final_text = (' '.join(splitted_text))
return final_text
def get_percentages(list1):
length = len(list1) - 1
final = []
for i in list1:
if (length >= 1):
s = ((i[-length:]))
for j in s:
final.append(j)
length = length - 1
return final
def assign_comparison(n):
comparisons = []
for i in range(1, n+1):
for j in range(1,n+1):
if ( i < j):
comparisons.append([i,j])
return comparisons