forked from cjardin/spring_2023_sms_fun
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLanguage_Processor.py
84 lines (56 loc) · 3.18 KB
/
Language_Processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from nltk import *
from nltk.sentiment import *
from nltk.corpus import stopwords #This is to give us a corpus of stopwords
from nltk.stem import PorterStemmer #This will remove any sort of suffixes to the word
from nltk.corpus import wordnet #This will give us snynoms and antnyoms of words
#stopWords = set(stopwords.words('english')) #Building a list of stop words
#words = word_tokenize(test)
#wordsFiltered = []
#for w in words: #Goes through our list of words
# if w not in stopWords: #See if they are not a stop word
# wordsFiltered.append(w) #If not, append to the list
#print(wordsFiltered)#This shows all words that were NOT stopwords
#tokens = word_tokenize(test) #Tokenizes the individual words
#print(pos_tag(tokens))
#tokens = sent_tokenize(test) #Tokenizes the full sentence
#print(pos_tag(tokens))
#sia = SentimentIntensityAnalyzer() #VADER instance
#print(sia.polarity_scores(test)) #Using the VADER instance to judge intent
def give_context (raw_text):
#context = [] #This wil store all the meaningful context
stopWords = set(stopwords.words('english')) #Building a list of stop words
words = word_tokenize(raw_text) #Gets a list of all individual words in the raw input text string
wordsFiltered = "" #Will store all the non-stop words
stemmer = PorterStemmer() #Gets an instance of the stemming function
sia = SentimentIntensityAnalyzer() #Instance of Vader
sia = max(sia.polarity_scores(raw_text)) #Gets the most prominent emotion.
#context.append(sia)
for w in words: #Goes through our list of words
if w not in stopWords: #See if they are not a stop word
wordsFiltered = wordsFiltered + " " + w #If not, append to the list
#context.append(wordsFiltered)
raw_tags_stored = "" #Will store all tags
tagged = word_tokenize(raw_text)#Tokenize the filtered list
tagged = pos_tag(tagged)#Tag the list
for tags in tagged:
raw_tags_stored = raw_tags_stored + " " + tags[1] #Stores tags into a string that directly matches the stemmed words
tags_stored = "" #Will store all tags
tagged = word_tokenize(wordsFiltered)#Tokenize the filtered list
tagged = pos_tag(tagged)#Tag the list
for tags in tagged:
tags_stored = tags_stored + " " + tags[1] #Stores tags into a string that directly matches the stemmed words
context = [] #This wil store all the meaningful context
context.append(raw_text)#The first element in the list is the orginal string.
context.append(raw_tags_stored)
context.append(sia) #The second element in the list is the VADER response of the string
context.append(wordsFiltered)#The third element in the list is a string with all stop words removed.
context.append(tags_stored) #The fourth element in the list is a string of all the types of speech/tags of the non-stop words [PATTERNS]
return context
print ("Please type a sentence to find out what the context is [DEBUGGING PURPOSES ONLY]")
print ("Type: exit() when finished")
text = ""
while (1):
text = input("> ")
if (text == "exit()"):
break
print(give_context(text))