forked from thiagodepaulo/total
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_text_classification.py
36 lines (30 loc) · 1.18 KB
/
run_text_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 6 15:41:19 2017
@author: thiagodepaulo
"""
from text_classifier_task import experiment, create_pipes
from preprocessor import Preprocessor
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from util import Loader
from imbhn import IMBHN
import logging
#parameters
s_dataset = '/exp/datasets/docs_rotulados/SyskillWebert-Parsed'
logger = logging.getLogger()
logger.setLevel(logging.INFO)
#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Load datasets
logging.info("loading dataset")
l = Loader()
d = l.from_files(s_dataset)
logging.info("done")
parameters = { 'preprocessor':[None, Preprocessor(lang='english')], 'clf':[SVC(), MultinomialNB(),
MultinomialNB(alpha=0.01), BernoulliNB(alpha=.01), IMBHN()] }
rcv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=0)
cv = StratifiedKFold(n_splits=10, random_state=0)
experiment(d,create_pipes(cache=True), parameters, logger=logger, cv=cv, scoring = ['accuracy'])