forked from schedutron/CPAP
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Complete Exercise 4-5: Threads, Files and Regex
- Loading branch information
1 parent
f36e041
commit 5f89546
Showing
4 changed files
with
72 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,5 @@ email | |
my_mbox.txt | ||
learn/ | ||
Chap4/bytes_file | ||
Chap4/links.html | ||
Chap4/links_v2.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env python3 | ||
import linecache, re, sys | ||
from threading import Lock, Thread | ||
|
||
|
||
def render_HTML(start, end): | ||
with open("links_v2.html", "a") as page: | ||
for i in range(start, end): | ||
line = linecache.getline(filename, i) | ||
match = EMAIL_PATT.match(line) | ||
if match: | ||
sender = match.groups()[0] | ||
addr = match.groups()[1] | ||
site = 'http://www.' + addr.split('@')[1] | ||
lock.acquire() | ||
page.write("<p><a href='{0}'>{1}</a></p>\n".format(site, sender)) # [1:] removes leading '>'. | ||
lock.release() | ||
|
||
|
||
filename = sys.argv[1] | ||
nthreads = int(sys.argv[2]) | ||
messages = [] | ||
EMAIL_PATT = re.compile(r"From: (.+) <(\S+)>") | ||
lock = Lock() | ||
|
||
with open(filename) as f: | ||
nlines = int(f.readline().strip()) # Assumes 1st line to be # of lines in the file. | ||
if nlines < nthreads: | ||
print("Too many threads for processing file. Use less threads.") | ||
quit() | ||
|
||
chunksize = nlines // nthreads | ||
threads = [] | ||
i = 0 | ||
with open("links_v2.html", "w") as f: | ||
f.write("<html>\n") | ||
|
||
while i < nlines: | ||
thread = Thread(target=render_HTML, args=(i, i+chunksize)) | ||
thread.start() | ||
threads.append(thread) | ||
i += chunksize | ||
if i > nlines: | ||
i = nlines | ||
|
||
for thread in threads: | ||
thread.join() | ||
|
||
with open("links_v2.html", "a") as f: | ||
f.write("</html>") | ||
print("HTML file written. Open links_v2.html to view the contents.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env python3 | ||
import email, mailbox, re | ||
|
||
mbox = mailbox.mbox("../Chap3/my_mbox.txt") | ||
messages = [] | ||
with open("links.html", "w") as page: | ||
page.write("<html>\n") | ||
for mail in mbox: | ||
mail_str = str(mail) | ||
msg = email.message_from_string(mail_str[mail_str.find('X-Received'):]) # [1:] removes the leading '\n'. | ||
addr = msg['From'] | ||
site = 'http://www.' + addr.split('@')[1] | ||
name = msg["From"].split('<')[0][:-1] | ||
page.write("<p><a href={0}>{1}</a></p>\n".format(site, name)) # [1:] removes leading '>'. | ||
page.write("</html>") | ||
print("HTML file written. Open links.html to view the contents.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters