-
Notifications
You must be signed in to change notification settings - Fork 0
/
ebooks.py
108 lines (94 loc) · 3.73 KB
/
ebooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import random
import re
import sys
import twitter
import markov
from local_settings import *
from get_askreddit_data import get_titles
try:
# Python 3
from html.entities import name2codepoint as n2c
from urllib.request import urlopen
except ImportError:
# Python 2
from htmlentitydefs import name2codepoint as n2c
from urllib2 import urlopen
chr = unichr
from local_settings import *
def connect(): #connect to twitter api for bot
return twitter.Api(consumer_key=MY_CONSUMER_KEY,
consumer_secret=MY_CONSUMER_SECRET,
access_token_key=MY_ACCESS_TOKEN_KEY,
access_token_secret=MY_ACCESS_TOKEN_SECRET,
tweet_mode='extended')
def entity(text):
if text[:2] == "&#":
try:
if text[:3] == "&#x":
return chr(int(text[3:-1], 16))
else:
return chr(int(text[2:-1]))
except ValueError:
pass
else:
guess = text[1:-1]
if guess == "apos":
guess = "lsquo"
numero = n2c[guess]
try:
text = chr(numero)
except KeyError:
pass
return text
def filter_status(text):
text = re.sub('\s+', ' ', text) # collaspse consecutive whitespace to single spaces.
text = re.sub(r'\"|\(|\)', '', text) # take out quotes.
htmlsents = re.findall(r'&\w+;', text)
for item in htmlsents:
text = text.replace(item, entity(item))
text = re.sub(r'\xe9', 'e', text) # take out accented e
return text
if __name__ == "__main__":
order = ORDER #the "sensical-ness" value from local_settings.py
guess = 0
if ODDS and not DEBUG: #if not in debug, give it a roll
guess = random.randint(0, ODDS - 1)
if guess:
print(str(guess) + " No, sorry, not this time.") # message if the random number fails.
sys.exit() #bye bye
else:
#connect to twitter api, initalize source list
api = connect()
source_titles = get_titles() #get the askreddit titles, this func exists in get_askreddit_data.py
if len(source_titles) == 0:
print("No statuses found!")
sys.exit()
mine = markov.MarkovChainer(order)
for status in source_titles:
if not re.search('([\.\!\?\"\']$)', status):
status += "?" #add a question mark at the end if one not found to keep the askreddit question thing consistent
mine.add_text(status)
for x in range(0, 10):
ebook_status = mine.generate_sentence() #done generate the thing now
# throw out tweets that match anything from the source
similar = True
if ebook_status is not None and len(ebook_status) < 210:
while similar:
for status in source_titles:
if ebook_status[:-1] not in status: #remove newline char and check if similar
#if its something unique, cool we've got the status and can proceed
similar = False
continue
else:
#if it is too similar to something in the source, generate a new one and try again
print("TOO SIMILAR. Generating a new status.")
for x in range(0, 10):
ebook_status = mine.generate_sentence()
if not DEBUG:
if ENABLE_TWITTER_POSTING:
status = api.PostUpdate(ebook_status) #actually post the tweet
print(ebook_status)
elif not ebook_status:
print("Status is empty, sorry.")
else:
print("TOO LONG: " + ebook_status)