-
Notifications
You must be signed in to change notification settings - Fork 1
/
demo.py
40 lines (31 loc) · 1.69 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def demo():
#Please update values of the following variables:
path = 'Restaurant_Reviews.tsv'
language = 'english' #You can find the list of accepted languages in the README.md file
words_to_be_included = ['not','nor','no'] #That should be part the reviews
words_to_be_excluded = ['opinion'] #That shouldn't be part of the reviews
no_reviews = 1000
maximum_features = 1500 #Maximum elements that the Bag of Words could contain
test_set_size = 0.2 #Percentage of the test set
#Importing the dataset
import pandas as pd
dataset = pd.read_csv(path, delimiter = '\t', quoting = 3)
#delimiter = '\t' indicates TAB as delimiter, opposed to the clasical delimiter ","
#quoting = 3 ignores the "" present in the reviews
corpus = review_classification.clean_reviews(no_reviews,language,words_to_be_excluded,words_to_be_included,dataset)
X,y = review_classification.bag_of_words(dataset,corpus,maximum_features)
# Splitting the dataset into the Training set and Test set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_set_size)
#Naive Bayes
accuracy_bayes,score_bayes=review_classification.naive_bayes(X_train,y_train,X_test,y_test)
#Random Forest
number_of_trees=1000
accuracy_random_forest,score_random_forest=review_classification.random_forest(number_of_trees, X_train,y_train,X_test,y_test)
#Logistic Regression
accuracy_LR,score_LR=review_classification.logistic_regression(X_train,y_train,X_test,y_test)
if __name__ == "__main__":
import review_classification
demo()