Merge pull request #9 from silshack/gh-pages

Update Repository
silshack · Sep 30, 2013 · 409422d · 409422d
2 parents 95fa70e + 8b0ed06
commit 409422d
Show file tree

Hide file tree

Showing 128 changed files with 118,936 additions and 18,802 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1 @@
-site*
+_site/*
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Elliott Hauser
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
 SILShack, Fall 2013
-===================
 
 This is the source code for the collaboratively edited blog/site for the Fall 2013 session of UNC INLS 560, Programming for Information Professionals.
 

diff --git a/_includes/add_names.py b/_includes/add_names.py
@@ -0,0 +1,22 @@
+def add_names(list_of_names, file):
+  """
+  Opens and adds a list of names to the end of a file, each on its own line
+  """
+  # We open a file in 'a' mode, for appending to it.
+  names_file = open(file, 'a')
+
+  # For each line in the list, we print that to the file. 
+  # This assumes one file per line.
+  for name in list_of_names:
+    print >> names_file, name
+
+  # Close the file so the changes are visible.
+  names_file.close()
+
+
+# Exercise: make new_names customizible:
+new_names = ['John', 'Sarah', 'Taj']
+
+# Exercise: make the file name used here customizible:
+add_names(new_names, 'names.txt')
+
diff --git a/_includes/gerbal/execise1.txt b/_includes/gerbal/execise1.txt
diff --git a/_includes/gerbal/execise2.txt b/_includes/gerbal/execise2.txt
@@ -0,0 +1 @@
+There are 4311 unique words in this work
diff --git a/_includes/gerbal/execise3.txt b/_includes/gerbal/execise3.txt
@@ -0,0 +1,21 @@
+The twenty most common terms in this work are:
+('the', 1574)
+('and', 1061)
+('a', 705)
+('to', 679)
+('of', 662)
+('in', 520)
+('it', 516)
+('he', 485)
+('was', 427)
+('his', 420)
+('that', 338)
+('i', 337)
+('scrooge', 314)
+('with', 269)
+('you', 233)
+('as', 228)
+('said', 221)
+('had', 205)
+('him', 198)
+('for', 197)
diff --git a/_includes/gerbal/execise4.txt b/_includes/gerbal/execise4.txt
@@ -0,0 +1 @@
+['', 'neighbours', 'jacob', 'warnt', 'poulterers', 'laundress', 'mr', 'execrable', 'endeavoured', 'dunstan', 'london', 'ebenezer', 'damascus', 'accusatory', 'recognising', 'crusoe', 'hadnt', 'waistcoats', 'conducive', 'everyhow', 'ebook', 'ironmongery', 'whomsoever', 'o', 'mens', 'reverently', 'behindhand', 'recognised', 'abels', 'iv', 'ii', 'im', 'youre', 'jose', 'schoolmaster', 'countrys', 'theyre', 'christmas', 'dilber', 'recollect', 'waistcoat', 'petrification', 'outstretched', 'undisturbed', 'blindmans', 'laocon', 'd', 'elses', 'munifi', 'wouldnt', 'december', 'sunday', 'frousy', 'marleys', 'browed', 'drowsiness', 'undigested', 'pastrycooks', 'spanish', 'unavailing', 'solemnised', 'despairingly', 'caroline', 'unaltered', 'scrutinise', 'lifes', 'flutterings', 'imploringly', 'foldings', 'defenceless', 'fezziwig', 'youd', 'pleasantry', 'endeavouring', 'hed', 'scro', 'broadwise', 'curiously', 'administered', 'menendez', 'recognise', 'ali', 'childs', 'overflowings', 'christian', '1843', 'fragrance', 'miserys', 'der', 'v', 'almshouse', 'robinson', 'shouldnt', 'a', 'isnt', 'norfolk', 'yo', 'fezziwigs', 'excrescence', 'robert', 'friday', 'dont', 'cornhill', 'rustlings', 'fred', 'i', 'wasnt', 'unconstrained', 'charitable', 'undertakers', 'weathercock', 'laundresss', 'belshazzars', 'c', 'coverley', 'hilli', 'whos', 'camden', 'ful', 'cratchits', 'belinda', 'tim', 'endeavour', 'passionless', 'wilkins', 'irresistibly', 'saturdays', 'penetrated', 'instalments', 'mrs', 'neighbouring', 'lamplighter', 'cratchit', 'oclock', 'skreeks', 'havent', 'tims', 'thats', 'wailings', 'lamplight', 'iii', 'didnt', 'orson', 'plentys', 'childrens', 'whitechapel', 'abrahams', 'marley', 'residuary', 'delicately', 'oge', 'uncurtained', 'uncared', 'martha', 'sheba', 'fruiterers', 'monstrous', 'youll', 'severally', 'observable', 'thankee', 'theyve', 'britain', 'ive', 'repleted', 'charles', 'pauls', 'hasnt', 'sprinklings', 'delightful', 'gutenberg', 'submissively', 'theyd', 'couldnt', 'wheresoever', 'demeanour', 'inasmuch', 'shufflings', 'shant', 'dowerless', 'monday', 'unwatched']
diff --git a/_includes/gerbal/extra1.py b/_includes/gerbal/extra1.py
@@ -0,0 +1,102 @@
+import re,string,operator,os
+
+def process_file(filename, guten):
+    hist = dict()
+    fp = open(filename)
+    if guten:
+        header = True
+    if not guten:
+        header = False
+    for line in fp:
+        if line[:20] == "*** END OF THIS PROJ": # There must be a better way to escape the header and footer
+            header = True
+        if not header:
+            process_line(line, hist)
+        if line[:20] == "*END*THE SMALL PRINT" or line[:20]=="*** START OF THIS PR": #this is only for the shakespeares folios "00ws110.tt"
+            header = False
+            #print "header escaped" #woo, debugging
+    return hist
+
+def process_line(line, hist):
+    line = line.replace('-', ' ') #clean hyphenated words
+
+    for word in line.split(): #re.split('[\W_]+', line) #could do the split with regex, but regex is magic and doesn't strip punctuation quite as nicely
+        word = word.strip(string.punctuation + string.whitespace)
+        word = word.lower()
+        word = re.sub('[\W_]+','', word) #this regular expression should get rid of the few special iso characters not in string.punctuation
+
+        hist[word] = hist.get(word, 0) + 1
+
+def top_20(hist):
+    hist_sorted = sorted(hist.iteritems(), key=operator.itemgetter(1), reverse=True) #according to stackexchange this is a really fast way to sort a dicitonary
+    output ="The twenty most common terms in this work are:\n"
+    for i in range(0,20):
+        output += str(hist_sorted[i]) +"\n"
+    return output
+
+def compare_lists(list1, list2):
+    new_list = dict()
+    for word1 in list1:
+        new_list[word1] = True
+        #print word1
+        if list2.has_key(word1):
+            new_list[word1] = False
+    return new_list
+
+def return_true_words(dic):
+    output_list = list()
+    for i in dic:
+        #print i
+        if dic[i]:
+            output_list.append(i)
+    return output_list
+
+def exercise1(book):
+    '''
+    proccess_file() got rather complicated and too sofisticated for this problem
+    So we're just going to rewrite it here. We could define a new method,
+    but my preference is to only do that if we need to solve the same problem 3 or more times
+    '''
+    fp = open(book)
+    longstring = str()
+    for line in fp:
+        line = line.replace('-', ' ')
+        for word in line.split(): 
+            word = word.strip(string.punctuation + string.whitespace)
+            word = word.lower()
+            word = re.sub('[\W_]+','', word) #regular expressions clean up wierd characters not included in string.punctuation
+            longstring = longstring + word + " "
+    return longstring
+
+def exercise2(book):
+    hist = process_file(book, True)
+    return "There are %d unique words in this work" %len(hist)
+
+def exercise3(book):
+    hist = process_file(book, True)
+    output = top_20(hist)
+    return output
+
+def exercise4(book):
+    output_list = return_true_words(compare_lists(
+        process_file(book, True), process_file("words.txt", False)))
+    return output_list #long 
+
+def writeitallout():
+    for i in range(1,5):
+        output = open("execise%d.txt" % i, "w")
+        method_name = "exercise%d" % i #because writing four method names is hard
+
+        outtext = eval(method_name) #eval() evaluates a string as python code
+        '''
+        eval() is kind of dangerous and has the potential to make it much easier
+        to excecute malicious, obfuscated code, but it works in this case.
+        I suppose this makes this bad code. The other methods I tried to solve this
+        problem did not work nearly as well.
+        '''
+        print "Writing exercise %d to file exercise%d.txt" % (i,i)
+        output.write(str(outtext('pg46.txt')))
+        output.close()
+
+writeitallout()
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		['', 'neighbours', 'jacob', 'warnt', 'poulterers', 'laundress', 'mr', 'execrable', 'endeavoured', 'dunstan', 'london', 'ebenezer', 'damascus', 'accusatory', 'recognising', 'crusoe', 'hadnt', 'waistcoats', 'conducive', 'everyhow', 'ebook', 'ironmongery', 'whomsoever', 'o', 'mens', 'reverently', 'behindhand', 'recognised', 'abels', 'iv', 'ii', 'im', 'youre', 'jose', 'schoolmaster', 'countrys', 'theyre', 'christmas', 'dilber', 'recollect', 'waistcoat', 'petrification', 'outstretched', 'undisturbed', 'blindmans', 'laocon', 'd', 'elses', 'munifi', 'wouldnt', 'december', 'sunday', 'frousy', 'marleys', 'browed', 'drowsiness', 'undigested', 'pastrycooks', 'spanish', 'unavailing', 'solemnised', 'despairingly', 'caroline', 'unaltered', 'scrutinise', 'lifes', 'flutterings', 'imploringly', 'foldings', 'defenceless', 'fezziwig', 'youd', 'pleasantry', 'endeavouring', 'hed', 'scro', 'broadwise', 'curiously', 'administered', 'menendez', 'recognise', 'ali', 'childs', 'overflowings', 'christian', '1843', 'fragrance', 'miserys', 'der', 'v', 'almshouse', 'robinson', 'shouldnt', 'a', 'isnt', 'norfolk', 'yo', 'fezziwigs', 'excrescence', 'robert', 'friday', 'dont', 'cornhill', 'rustlings', 'fred', 'i', 'wasnt', 'unconstrained', 'charitable', 'undertakers', 'weathercock', 'laundresss', 'belshazzars', 'c', 'coverley', 'hilli', 'whos', 'camden', 'ful', 'cratchits', 'belinda', 'tim', 'endeavour', 'passionless', 'wilkins', 'irresistibly', 'saturdays', 'penetrated', 'instalments', 'mrs', 'neighbouring', 'lamplighter', 'cratchit', 'oclock', 'skreeks', 'havent', 'tims', 'thats', 'wailings', 'lamplight', 'iii', 'didnt', 'orson', 'plentys', 'childrens', 'whitechapel', 'abrahams', 'marley', 'residuary', 'delicately', 'oge', 'uncurtained', 'uncared', 'martha', 'sheba', 'fruiterers', 'monstrous', 'youll', 'severally', 'observable', 'thankee', 'theyve', 'britain', 'ive', 'repleted', 'charles', 'pauls', 'hasnt', 'sprinklings', 'delightful', 'gutenberg', 'submissively', 'theyd', 'couldnt', 'wheresoever', 'demeanour', 'inasmuch', 'shufflings', 'shant', 'dowerless', 'monday', 'unwatched']