|
1 | 1 | package zingg.common.core.data;
|
2 | 2 |
|
3 | 3 | import zingg.common.core.model.InputDataModel;
|
4 |
| -import zingg.common.core.model.Event; |
5 |
| -import zingg.common.core.model.EventPair; |
6 |
| -import zingg.common.core.model.Statement; |
7 |
| -import zingg.common.core.model.PostStopWordProcess; |
8 |
| -import zingg.common.core.model.PriorStopWordProcess; |
| 4 | +import zingg.common.core.preprocess.model.PostStopWordProcess; |
| 5 | +import zingg.common.core.preprocess.model.PriorStopWordProcess; |
9 | 6 |
|
10 | 7 | import java.util.ArrayList;
|
11 | 8 | import java.util.List;
|
12 | 9 |
|
13 | 10 | public class EventTestData {
|
14 |
| - public static List<Event> createSampleEventData() { |
15 |
| - |
16 |
| - int row_id = 1; |
17 |
| - List<Event> sample = new ArrayList<Event>(); |
18 |
| - sample.add(new Event(row_id++, 1942, "quit India", "Mahatma Gandhi")); |
19 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
20 |
| - sample.add(new Event(row_id++, 1930, "Civil Disob", "India")); |
21 |
| - sample.add(new Event(row_id++, 1942, "quit India", "Mahatma Gandhi")); |
22 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
23 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidience", "India")); |
24 |
| - sample.add(new Event(row_id++, 1942, "quit Hindustan", "Mahatma Gandhi")); |
25 |
| - sample.add(new Event(row_id++, 1919, "JW", "Amritsar")); |
26 |
| - sample.add(new Event(row_id++, 1930, "Civil Dis", "India")); |
27 |
| - sample.add(new Event(row_id++, 1942, "quit Nation", "Mahatma")); |
28 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
29 |
| - sample.add((new Event(row_id++, 1942, "quit N", "Mahatma"))); |
30 |
| - sample.add((new Event(row_id++, 1919, "JallianWal", "Punjb"))); |
31 |
| - sample.add(new Event(row_id++, 1942, "quit ", "Mahatm")); |
32 |
| - sample.add(new Event(row_id++, 1942, "quit Ntn", "Mahama")); |
33 |
| - sample.add(new Event(row_id++, 1942, "quit Natin", "Mahaatma")); |
34 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
35 |
| - sample.add(new Event(row_id++, 1930, "Civil Disob", "India")); |
36 |
| - sample.add(new Event(row_id++, 1942, "quit India", "Mahatma Gandhi")); |
37 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
38 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidience", "India")); |
39 |
| - sample.add(new Event(row_id++, 1942, "Quit Bharat", "Mahatma Gandhi")); |
40 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
41 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidence", "India")); |
42 |
| - sample.add(new Event(row_id++, 1942, "quit Hindustan", "Mahatma Gandhi")); |
43 |
| - sample.add(new Event(row_id++, 1919, "JW", "Amritsar")); |
44 |
| - sample.add(new Event(row_id++, 1930, "Civil Dis", "India")); |
45 |
| - sample.add(new Event(row_id++, 1942, "quit Nation", "Mahatma")); |
46 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
47 |
| - sample.add(new Event(row_id++, 1942, "quit N", "Mahatma")); |
48 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
49 |
| - sample.add(new Event(row_id++, 1942, "quit ", "Mahatm")); |
50 |
| - sample.add(new Event(row_id++, 1942, "quit Ntn", "Mahama")); |
51 |
| - sample.add(new Event(row_id++, 1942, "quit Natin", "Mahaatma")); |
52 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
53 |
| - sample.add(new Event(row_id++, 1930, "Civil Disob", "India")); |
54 |
| - sample.add(new Event(row_id++, 1942, "quit India", "Mahatma Gandhi")); |
55 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
56 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidience", "India")); |
57 |
| - sample.add(new Event(row_id++, 1942, "Quit Bharat", "Mahatma Gandhi")); |
58 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
59 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidence", "India")); |
60 |
| - sample.add(new Event(row_id++, 1942, "quit Hindustan", "Mahatma Gandhi")); |
61 |
| - sample.add(new Event(row_id++, 1919, "JW", "Amritsar")); |
62 |
| - sample.add(new Event(row_id++, 1930, "Civil Dis", "India")); |
63 |
| - sample.add(new Event(row_id++, 1942, "quit Nation", "Mahatma")); |
64 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
65 |
| - sample.add(new Event(row_id++, 1942, "quit N", "Mahatma")); |
66 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
67 |
| - sample.add(new Event(row_id++, 1942, "quit ", "Mahatm")); |
68 |
| - sample.add(new Event(row_id++, 1942, "quit Ntn", "Mahama")); |
69 |
| - sample.add(new Event(row_id++, 1942, "quit Natin", "Mahaatma")); |
70 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
71 |
| - sample.add(new Event(row_id++, 1930, "Civil Disob", "India")); |
72 |
| - sample.add(new Event(row_id++, 1942, "quit India", "Mahatma Gandhi")); |
73 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
74 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidience", "India")); |
75 |
| - sample.add(new Event(row_id++, 1942, "Quit Bharat", "Mahatma Gandhi")); |
76 |
| - sample.add(new Event(row_id++, 1919, "JallianWala", "Punjab")); |
77 |
| - sample.add(new Event(row_id++, 1930, "Civil Disobidence", "India")); |
78 |
| - sample.add(new Event(row_id++, 1942, "quit Hindustan", "Mahatma Gandhi")); |
79 |
| - sample.add(new Event(row_id++, 1919, "JW", "Amritsar")); |
80 |
| - sample.add(new Event(row_id++, 1930, "Civil Dis", "India")); |
81 |
| - sample.add(new Event(row_id++, 1942, "quit Nation", "Mahatma")); |
82 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
83 |
| - sample.add(new Event(row_id++, 1942, "quit N", "Mahatma")); |
84 |
| - sample.add(new Event(row_id++, 1919, "JallianWal", "Punjb")); |
85 |
| - sample.add(new Event(row_id++, 1942, "quit ", "Mahatm")); |
86 |
| - sample.add(new Event(row_id++, 1942, "quit Ntn", "Mahama")); |
87 |
| - sample.add(new Event(row_id, 1942, "quit Natin", "Mahaatma")); |
88 |
| - |
89 |
| - return sample; |
90 |
| - } |
91 |
| - |
92 |
| - public static List<EventPair> createSampleClusterEventData() { |
93 |
| - |
94 |
| - int row_id = 1; |
95 |
| - List<EventPair> sample = new ArrayList<EventPair>(); |
96 |
| - sample.add(new EventPair(row_id++, 1942, "quit Nation", "Mahatma",1942, "quit Nation", "Mahatma", 1L)); |
97 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
98 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
99 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma",1942, "quit N", "Mahatma", 1L)); |
100 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
101 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
102 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
103 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
104 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
105 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
106 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
107 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
108 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
109 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
110 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
111 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
112 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
113 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
114 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
115 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
116 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
117 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
118 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
119 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
120 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
121 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
122 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
123 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
124 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
125 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
126 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
127 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
128 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
129 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
130 |
| - sample.add(new EventPair(row_id++, 1942, "quit N", "Mahatma", 1942, "quit N", "Mahatma", 1L)); |
131 |
| - sample.add(new EventPair(row_id++, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
132 |
| - sample.add(new EventPair(row_id++, 1942, "quit ", "Mahatm", 1942, "quit ", "Mahatm", 1L)); |
133 |
| - sample.add(new EventPair(row_id++, 1942, "quit Ntn", "Mahama", 1942, "quit Ntn", "Mahama", 1L)); |
134 |
| - sample.add(new EventPair(row_id++, 1942, "quit Natin", "Mahaatma", 1942, "quit Natin", "Mahaatma", 1L)); |
135 |
| - sample.add(new EventPair(row_id, 1919, "JallianWal", "Punjb", 1919, "JallianWal", "Punjb", 2L)); |
136 |
| - |
137 |
| - return sample; |
138 |
| - } |
139 |
| - |
140 |
| - public static List<Statement> getData1Original() { |
141 |
| - |
142 |
| - List<Statement> sample = new ArrayList<Statement>(); |
143 |
| - sample.add(new Statement("the zingg is a spark application")); |
144 |
| - sample.add(new Statement("it is very popular in data science")); |
145 |
| - sample.add(new Statement("it is written in java and scala")); |
146 |
| - sample.add(new Statement("best of luck to zingg")); |
147 |
| - |
148 |
| - return sample; |
149 |
| - } |
150 |
| - |
151 |
| - public static List<Statement> getData1Expected() { |
152 |
| - |
153 |
| - List<Statement> sample = new ArrayList<Statement>(); |
154 |
| - sample.add(new Statement("zingg spark application")); |
155 |
| - sample.add(new Statement("very popular in data science")); |
156 |
| - sample.add(new Statement("written in java and scala")); |
157 |
| - sample.add(new Statement("best luck to zingg")); |
158 |
| - |
159 |
| - return sample; |
160 |
| - } |
161 |
| - |
162 |
| - public static List<PriorStopWordProcess> getData2Original() { |
163 |
| - |
164 |
| - List<PriorStopWordProcess> sample = new ArrayList<PriorStopWordProcess>(); |
165 |
| - sample.add(new PriorStopWordProcess("10", "the zingg is a spark application", "two", |
166 |
| - "Yes. a good application", "test")); |
167 |
| - sample.add(new PriorStopWordProcess("20", "it is very popular in data science", "Three", "true indeed", |
168 |
| - "test")); |
169 |
| - sample.add(new PriorStopWordProcess("30", "it is written in java and scala", "four", "", "test")); |
170 |
| - sample.add(new PriorStopWordProcess("40", "best of luck to zingg mobile/t-mobile", "Five", "thank you", "test")); |
171 |
| - |
172 |
| - return sample; |
173 |
| - } |
174 |
| - |
175 |
| - public static List<PriorStopWordProcess> getData2Expected() { |
176 |
| - |
177 |
| - List<PriorStopWordProcess> sample = new ArrayList<PriorStopWordProcess>(); |
178 |
| - sample.add(new PriorStopWordProcess("10", "zingg spark application", "two", "Yes. a good application", "test")); |
179 |
| - sample.add(new PriorStopWordProcess("20", "very popular data science", "Three", "true indeed", "test")); |
180 |
| - sample.add(new PriorStopWordProcess("30", "written java scala", "four", "", "test")); |
181 |
| - sample.add(new PriorStopWordProcess("40", "best luck to zingg ", "Five", "thank you", "test")); |
182 |
| - |
183 |
| - return sample; |
184 |
| - } |
185 | 11 |
|
186 | 12 | public static List<PriorStopWordProcess> getDataInputPreProcessed() {
|
187 | 13 |
|
|
0 commit comments