-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwc.py
35 lines (26 loc) · 891 Bytes
/
wc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from pymongo import MongoClient
from PIL import Image
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nlp.stopword import StopwordsRemover
client = MongoClient("mongodb+srv://lda-nlp:[email protected]/myFirstDatabase?retryWrites=true&w=majority")
db = client['topicmodeling']
collection = db['novinky']
cursor = collection.find({})
dictionary = {}
stopwords = StopwordsRemover()
for document in cursor:
for term in document["terms"]:
if not stopwords.is_stopword(term):
if term not in dictionary:
dictionary[term] = 0
dictionary[term] = dictionary[term] + 1
print(dictionary)
wordcloud = WordCloud(
background_color="white",
width=1920,
height=1080,
max_words=200,
normalize_plurals=False
).generate_from_frequencies(dictionary)
wordcloud.to_file('wordcloud.png')