-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from silshack/gh-pages
Update Repository
- Loading branch information
Showing
128 changed files
with
118,936 additions
and
18,802 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
site* | ||
_site/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright (c) 2013 Elliott Hauser | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
this software and associated documentation files (the "Software"), to deal in | ||
the Software without restriction, including without limitation the rights to | ||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
the Software, and to permit persons to whom the Software is furnished to do so, | ||
subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
def add_names(list_of_names, file): | ||
""" | ||
Opens and adds a list of names to the end of a file, each on its own line | ||
""" | ||
# We open a file in 'a' mode, for appending to it. | ||
names_file = open(file, 'a') | ||
|
||
# For each line in the list, we print that to the file. | ||
# This assumes one file per line. | ||
for name in list_of_names: | ||
print >> names_file, name | ||
|
||
# Close the file so the changes are visible. | ||
names_file.close() | ||
|
||
|
||
# Exercise: make new_names customizible: | ||
new_names = ['John', 'Sarah', 'Taj'] | ||
|
||
# Exercise: make the file name used here customizible: | ||
add_names(new_names, 'names.txt') | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
There are 4311 unique words in this work |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The twenty most common terms in this work are: | ||
('the', 1574) | ||
('and', 1061) | ||
('a', 705) | ||
('to', 679) | ||
('of', 662) | ||
('in', 520) | ||
('it', 516) | ||
('he', 485) | ||
('was', 427) | ||
('his', 420) | ||
('that', 338) | ||
('i', 337) | ||
('scrooge', 314) | ||
('with', 269) | ||
('you', 233) | ||
('as', 228) | ||
('said', 221) | ||
('had', 205) | ||
('him', 198) | ||
('for', 197) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
['', 'neighbours', 'jacob', 'warnt', 'poulterers', 'laundress', 'mr', 'execrable', 'endeavoured', 'dunstan', 'london', 'ebenezer', 'damascus', 'accusatory', 'recognising', 'crusoe', 'hadnt', 'waistcoats', 'conducive', 'everyhow', 'ebook', 'ironmongery', 'whomsoever', 'o', 'mens', 'reverently', 'behindhand', 'recognised', 'abels', 'iv', 'ii', 'im', 'youre', 'jose', 'schoolmaster', 'countrys', 'theyre', 'christmas', 'dilber', 'recollect', 'waistcoat', 'petrification', 'outstretched', 'undisturbed', 'blindmans', 'laocon', 'd', 'elses', 'munifi', 'wouldnt', 'december', 'sunday', 'frousy', 'marleys', 'browed', 'drowsiness', 'undigested', 'pastrycooks', 'spanish', 'unavailing', 'solemnised', 'despairingly', 'caroline', 'unaltered', 'scrutinise', 'lifes', 'flutterings', 'imploringly', 'foldings', 'defenceless', 'fezziwig', 'youd', 'pleasantry', 'endeavouring', 'hed', 'scro', 'broadwise', 'curiously', 'administered', 'menendez', 'recognise', 'ali', 'childs', 'overflowings', 'christian', '1843', 'fragrance', 'miserys', 'der', 'v', 'almshouse', 'robinson', 'shouldnt', 'a', 'isnt', 'norfolk', 'yo', 'fezziwigs', 'excrescence', 'robert', 'friday', 'dont', 'cornhill', 'rustlings', 'fred', 'i', 'wasnt', 'unconstrained', 'charitable', 'undertakers', 'weathercock', 'laundresss', 'belshazzars', 'c', 'coverley', 'hilli', 'whos', 'camden', 'ful', 'cratchits', 'belinda', 'tim', 'endeavour', 'passionless', 'wilkins', 'irresistibly', 'saturdays', 'penetrated', 'instalments', 'mrs', 'neighbouring', 'lamplighter', 'cratchit', 'oclock', 'skreeks', 'havent', 'tims', 'thats', 'wailings', 'lamplight', 'iii', 'didnt', 'orson', 'plentys', 'childrens', 'whitechapel', 'abrahams', 'marley', 'residuary', 'delicately', 'oge', 'uncurtained', 'uncared', 'martha', 'sheba', 'fruiterers', 'monstrous', 'youll', 'severally', 'observable', 'thankee', 'theyve', 'britain', 'ive', 'repleted', 'charles', 'pauls', 'hasnt', 'sprinklings', 'delightful', 'gutenberg', 'submissively', 'theyd', 'couldnt', 'wheresoever', 'demeanour', 'inasmuch', 'shufflings', 'shant', 'dowerless', 'monday', 'unwatched'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import re,string,operator,os | ||
|
||
def process_file(filename, guten): | ||
hist = dict() | ||
fp = open(filename) | ||
if guten: | ||
header = True | ||
if not guten: | ||
header = False | ||
for line in fp: | ||
if line[:20] == "*** END OF THIS PROJ": # There must be a better way to escape the header and footer | ||
header = True | ||
if not header: | ||
process_line(line, hist) | ||
if line[:20] == "*END*THE SMALL PRINT" or line[:20]=="*** START OF THIS PR": #this is only for the shakespeares folios "00ws110.tt" | ||
header = False | ||
#print "header escaped" #woo, debugging | ||
return hist | ||
|
||
def process_line(line, hist): | ||
line = line.replace('-', ' ') #clean hyphenated words | ||
|
||
for word in line.split(): #re.split('[\W_]+', line) #could do the split with regex, but regex is magic and doesn't strip punctuation quite as nicely | ||
word = word.strip(string.punctuation + string.whitespace) | ||
word = word.lower() | ||
word = re.sub('[\W_]+','', word) #this regular expression should get rid of the few special iso characters not in string.punctuation | ||
|
||
hist[word] = hist.get(word, 0) + 1 | ||
|
||
def top_20(hist): | ||
hist_sorted = sorted(hist.iteritems(), key=operator.itemgetter(1), reverse=True) #according to stackexchange this is a really fast way to sort a dicitonary | ||
output ="The twenty most common terms in this work are:\n" | ||
for i in range(0,20): | ||
output += str(hist_sorted[i]) +"\n" | ||
return output | ||
|
||
def compare_lists(list1, list2): | ||
new_list = dict() | ||
for word1 in list1: | ||
new_list[word1] = True | ||
#print word1 | ||
if list2.has_key(word1): | ||
new_list[word1] = False | ||
return new_list | ||
|
||
def return_true_words(dic): | ||
output_list = list() | ||
for i in dic: | ||
#print i | ||
if dic[i]: | ||
output_list.append(i) | ||
return output_list | ||
|
||
def exercise1(book): | ||
''' | ||
proccess_file() got rather complicated and too sofisticated for this problem | ||
So we're just going to rewrite it here. We could define a new method, | ||
but my preference is to only do that if we need to solve the same problem 3 or more times | ||
''' | ||
fp = open(book) | ||
longstring = str() | ||
for line in fp: | ||
line = line.replace('-', ' ') | ||
for word in line.split(): | ||
word = word.strip(string.punctuation + string.whitespace) | ||
word = word.lower() | ||
word = re.sub('[\W_]+','', word) #regular expressions clean up wierd characters not included in string.punctuation | ||
longstring = longstring + word + " " | ||
return longstring | ||
|
||
def exercise2(book): | ||
hist = process_file(book, True) | ||
return "There are %d unique words in this work" %len(hist) | ||
|
||
def exercise3(book): | ||
hist = process_file(book, True) | ||
output = top_20(hist) | ||
return output | ||
|
||
def exercise4(book): | ||
output_list = return_true_words(compare_lists( | ||
process_file(book, True), process_file("words.txt", False))) | ||
return output_list #long | ||
|
||
def writeitallout(): | ||
for i in range(1,5): | ||
output = open("execise%d.txt" % i, "w") | ||
method_name = "exercise%d" % i #because writing four method names is hard | ||
|
||
outtext = eval(method_name) #eval() evaluates a string as python code | ||
''' | ||
eval() is kind of dangerous and has the potential to make it much easier | ||
to excecute malicious, obfuscated code, but it works in this case. | ||
I suppose this makes this bad code. The other methods I tried to solve this | ||
problem did not work nearly as well. | ||
''' | ||
print "Writing exercise %d to file exercise%d.txt" % (i,i) | ||
output.write(str(outtext('pg46.txt'))) | ||
output.close() | ||
|
||
writeitallout() | ||
|
Oops, something went wrong.