Complete Exercise 4-4-b: multithreaded file reading; used time CLT in…

…stead of timeit
banrieen · Aug 8, 2017 · f36e041 · f36e041
1 parent 4158419
commit f36e041
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 0 deletions.
diff --git a/Chap4/mt_bytes_count.py b/Chap4/mt_bytes_count.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""
+Script to count occurences of a particular byte in a file.
+
+There was no noticeable difference in single threaded and multithreaded runs.
+However, the multithreaded run indeed executed faster (on the order of
+milliseconds). Also, more threads didn't necessarily mean more performance, e.g,
+a run with 20 threads executed slightly faster than a run with 100 threads.
+"""
+
+import os, sys
+from threading import Thread, Lock
+
+def count(filename, byte, start, end):
+    global global_count
+    number = 0
+    with open(filename, 'rb') as f:
+        f.seek(start)
+        for i in range(start, end):
+            c = f.read(1)
+            if c == byte:
+                number += 1
+    lock.acquire()
+    global_count += number
+    lock.release()
+
+lock = Lock()
+filename = sys.argv[1]
+byte = bytes(sys.argv[2], 'utf-8')
+global_count = 0
+filesize = os.stat(filename).st_size
+nthreads = int(sys.argv[3])
+
+if nthreads > filesize:
+    print("Too many threads for reading this file! Use less threads.")
+    quit()
+
+readsize = filesize // nthreads
+ranges = []
+threads = []
+i = 0
+while i < filesize:
+    ranges.append([i, i+readsize])
+    i += readsize
+if ranges[-1][1] > filesize:
+    ranges[-1][1] = filesize
+
+for pair in ranges:
+    thread = Thread(target=count, args=(filename, byte, pair[0], pair[1]))
+    thread.start()
+    threads.append(thread)
+
+for thread in threads:
+    thread.join()
+
+print(global_count)
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@
     * [Exercise 4-2: Utility of multithreading in Python (python_threads.md)][4-2]
     * [Exercise 4-3: Mulithreading on Multicore System (threads_multicore.md)][4-3]
     * [Exercise 4-4-a: Simple Byte Count (bytes_count.py)][4-4-a]
+    * [Exercise 4-4-b: Multithreaded Byte Count (mt_bytes_count.py)][4-4-b]
 
 [chap4]: /Chap4
 [e4-10]: /Chap4/mtsleepF.py
@@ -25,3 +26,4 @@
 [4-2]: /Chap4/python_threads.md
 [4-3]: /Chap4/threads_multicore.md
 [4-4-a]: /Chap4/bytes_count.py
+[4-4-b]: /Chap4/mt_bytes_count.py