-
Notifications
You must be signed in to change notification settings - Fork 0
/
Constants.java
143 lines (118 loc) · 3.98 KB
/
Constants.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*
* Constants and default values used in the application.
*
* Copyright (C) 2013 Lisa Vitolo <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the Creative Commons
* Attribution-NonCommercial-ShareAlike 3.0 license.
* You should have received a copy of the license with this product.
* Otherwise, visit http://creativecommons.org/licenses/by-nc-sa/3.0/
*/
import libsvm.svm_parameter;
public class Constants
{
private static final int FEATURE_SIZE = 19;
private static final String[] stopWords = {"i", "a", "about", "an", "are", "as", "at", "be", "by", "for", "from",
"how", "in", "is", "it", "of", "on", "or", "that", "the", "this", "to",
"was", "what", "when", "where", "who", "will", "with", "the", "'s", "did",
"have", "has", "had", "were", "'ll"};
private static final String wordFrequenciesPath = "word-frequencies.txt";
/*
* These paths and parameters are all configurable through a Properties file, so
* we provide also setters and they are not declared final. These below are the
* default values.
*/
private static String googleCorpusFolder = "googlebooks/";
private static String similarityModelPath = "similarityModel.txt";
private static String lsaMatrixPath = "lsa_matrix.txt";
private static int LSA_VECTOR_SIZE = 100;
/* Constants for CrossValidation.java */
private static final int CROSS_VALIDATION_FOLD = 10;
private static final double[] C_VALUES = {1, 2, 5, 10, 20, 50, 100, 200, 500, 1000};
private static final double[] P_VALUES = {1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01};
private static final double[] G_VALUES = {2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01, 0.005, 0.002};
/* Optimal parameters for the SV regressor */
private static final double C = 1;
private static final double P = 0.02;
private static final double G = 2;
public static int getFeatureNumber()
{
return FEATURE_SIZE;
}
public static String[] getStopWords()
{
return stopWords;
}
public static String getGoogleCorpusFolder()
{
return googleCorpusFolder;
}
public static String getWordFrequenciesPath()
{
return wordFrequenciesPath;
}
public static String getSimilarityModelPath()
{
return similarityModelPath;
}
public static String getLSAMatrixPath()
{
return lsaMatrixPath;
}
public static int getLSAVectorSize()
{
return LSA_VECTOR_SIZE;
}
public static svm_parameter getSVMParameters()
{
svm_parameter param = new svm_parameter();
/* These parameters are taken from the Takelab implementation. */
param.svm_type = svm_parameter.EPSILON_SVR;
param.kernel_type = svm_parameter.RBF;
param.cache_size = 10.0;
param.eps = 0.001;
return param;
}
public static int getValidationFold()
{
return CROSS_VALIDATION_FOLD;
}
public static double[] getCValues()
{
return C_VALUES;
}
public static double[] getPValues()
{
return P_VALUES;
}
public static double[] getGammaValues()
{
return G_VALUES;
}
public static double getBestC()
{
return C;
}
public static double getBestP()
{
return P;
}
public static double getBestGamma()
{
return G;
}
public static void setGoogleCorpusFolder(String f)
{
googleCorpusFolder = f;
}
public static void setLsaMatrixPath(String matrixFile, int vectorSize)
{
lsaMatrixPath = matrixFile;
LSA_VECTOR_SIZE = vectorSize;
}
public static void setSimilarityModelPath(String f)
{
similarityModelPath = f;
}
}