-
Notifications
You must be signed in to change notification settings - Fork 5
/
nlp_form.html
265 lines (262 loc) · 12.3 KB
/
nlp_form.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
<html>
<head>
<title>Coptic NLP Service</title>
<link rel="stylesheet" href="css/global.css" type="text/css" charset="utf-8"/>
<link rel="stylesheet" href="css/nlp.css" type="text/css" charset="utf-8"/>
<link rel="stylesheet" href="css/pagination.css" type="text/css" charset="utf-8"/>
<link rel="stylesheet" href="css/font-awesome-4.2.0/css/font-awesome.min.css"/>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=800">
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
<link rel="icon" href="favicon.ico" type="image/x-icon">
<script>var __adobewebfontsappname__="dreamweaver"</script>
<link rel="stylesheet" href="https://use.edgefonts.net/c/dbcb1c/1w;asul,2,WXx:W:n4/l" media="all">
<!--<script src="http://use.edgefonts.net/asul:n4:default.js" type="text/javascript"></script>-->
</head>
<body class="home">
<div id="wrapper">
<header id="header">
**navbar**
</header>
<form id="nlp_form" class="nlp_form" method="post" action="/coptic-nlp/**action_dest**">
<h2 class="nlp_title">Coptic NLP Service</h2>
**access_message**
<div>
<h3 class="nlp_title">Input:</h3>
<input type="radio" name="lb" value="line" >My data contains meaningful linebreaks
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
This inserts <line>..</line> tags around each line of text.</br>
If you already have <lb/> tags or your data is already tokenized, you
probably want to ignore line breaks.
<br/>
</span>
</a>
</input>
<br/>
<input type="radio" name="lb" value="noline"**noline_checked**>Ignore linebreaks in my data</input>
<br/>
<h3 class="nlp_title">Output:</h3>
<table>
<tr><td colspan="2" style="padding-bottom: 10px"><input type="checkbox" id="old_tok" name="old_tok" value="old_tok" onclick="toggle_laytonize(false);"**old_checked**>Use old finite state tokenizer
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Less accurate, provided for reproducing older results. Not compatible with detokenization.<br/>
</span>
</a></input><br/></td></tr>
<tr><td colspan="2" style="padding-bottom: 10px"><input type="checkbox" id="detokenize" name="detokenize" value="detokenize" onclick="toggle_laytonize(true);"**detokenize_checked**>Re-merge bound groups
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Regularizes bound group spaces if input does not follow Layton's guidelines<br/>
(a.k.a. 'Laytonization'; increases accuracy on Till-segmented text and OCR)
</span>
</a></input><br/>
<ul>
<input type="radio" id="laytonize1" name="laytonize" value="conservative"**laytonize_conservative_checked**>Conservative merging<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Only re-bind items known to appear unbound in other segmentations <br/>(e.g. well edited text following Till)<br/>
<div style="font-family: Antinoou; text-align:right; margin-bottom: 0px; font-weight: bold"><br/>ϩⲙ ⲡⲏⲓ --> ϩⲙ|ⲡ|ⲏⲓ</div>
</span>
</a></input><br/>
<input type="radio" id="laytonize2" name="laytonize" value="aggressive"**laytonize_aggressive_checked**>Aggressive merging<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Re-bind all items that are unlikely to appear unbound <br/>(better for messy data/OCR)<br/>
<div style="font-family: Antinoou; text-align:right; margin-bottom: 0px; font-weight: bold"><br/>ⲁ ϥⲥⲱⲧⲙ --> ⲁ|ϥ|ⲥⲱⲧⲙ</div>
</span>
</a></input><br/>
<input type="radio" id="laytonize3" name="laytonize" value="smart"**laytonize_smart_checked**>Smart merging<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Re-bind items using a context sensitive machine learning binder <br/>(trained on editions by E.A.W. Budge)<br/>
<div style="font-family: Antinoou; text-align:right; margin-bottom: 0px; font-weight: bold"><br/>ⲉ ⲃⲟⲗ ⲙ ⲡⲏⲓ --> ⲉⲃⲟⲗ ⲙ|ⲡ|ⲏⲓ</div>
</span>
</a></input><br/>
<input type="checkbox" id="segment_merged" name="segment_merged" value="segment_merged"**segment_merged_checked**>Segment at merge point
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
If bound groups are merged, assume a morpheme boundary <br/>
(recommended if base segmentation is reliable)
</span>
</a></input><br/>
</ul>
</td></tr>
<tr><td>
<input type="radio" name="sgml_mode" value="sgml" onclick="disable_checkboxes(false);"**sgml_checked**>SGML pipeline</input><br/>
<ul>
<input type="checkbox" id="milestone" name="milestone" value="milestone"**milestone_checked**>Stretch milestones
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
This setting replaces unary XML elements with binary ones. For example for
milestone page break elements: (<pb/> → <pb> ... </pb>)
<br/>
</span>
</a>
</input><br/>
<input type="checkbox" id="tok" name="tok" value="tok"**tok_checked**>Tokenize <span style="color: gray; font-size:small"><tt>[stk-2.0.0]</tt></span></input>
<ul style="padding-left: 20px;">
<input type="radio" name="tok_mode" value="auto"**auto_checked**>Automatic</input><br/>
<input type="radio" name="tok_mode" value="from_pipes"**pipes_checked**>From pipes in input</input>
</ul>
<input type="checkbox" id="norm" name="norm" value="norm"**norm_checked**>Normalize
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Disable to remove norm_group attribute from output.<br/>
Diacritic stripping will still be done for processing norm units.
<br/>
</span>
</a>
</input><br/>
<input type="checkbox" id="tag" name="tag" value="tag"**tag_checked**>Tag</input><span style="color: gray; font-size:small"><tt>[marm-3.0.0]</tt></span><br/>
<input type="checkbox" id="lemma" name="lemma" value="lemma"**lemma_checked**>Lemmatize</input><br/>
<input type="checkbox" id="lang" name="lang" value="lang"**lang_checked**>Language of origin</input><br/>
<input type="checkbox" id="mwe" name="mwe" value="mwe"**mwe_checked**>MWE recognition
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Enable to automatically recognize multiword expressions (MWEs), e.g. ϭⲱⲗⲡ ⲉⲃⲟⲗ.<br/>
Known MWEs are retrieved from the Coptic Dictionary Online.
<br/>
</span>
</a>
</input><br/>
<input type="checkbox" id="parse" name="parse" value="parse"**parse_checked**>Parse</input><br/>
<input type="checkbox" id="entities" name="entities" value="entities"**entities_checked**>Entity recognition
<a href="#" class="tooltip2">
<i class="fa fa-info-circle" style="display: inline-block"></i>
<span>
<img class="callout" src="img/callout.gif" />
Identify sequences of words referring to people, places and more.
<br/>
</span>
</a>
</input><br/>
</ul>
</td>
<td style="vertical-align: top; padding-left: 20px">
<input type="radio" name="sgml_mode" value="pipes" onclick="disable_checkboxes(true);"**justpipes_checked**>Just piped and dashed morphemes
</input>
</td>
</tr>
</table>
</div>
<div>
<textarea class="anti nlp_input" id="data" name="data" type="textarea">**data**</textarea>
</div>
<div><button class="nlp" type="submit" onclick="isValidForm()">Process</button></div>
<div>
<p>Result:</p>
<textarea class="anti nlp_input" id="result" type="textarea">**processed**</textarea>
</div>
<div id="entity_container">
</div>
<div id="tree_container">
<div id="holder0" class="svgholder" nr="0"><svg id="svg0"></svg></div>
</div>
<div id="pagination"></div>
</form>
</div>
<script>
document.getElementById('nlp_form').onsubmit = function() {
return false;
};
function isValidForm(){
if (document.getElementById("data").value.indexOf("|") > -1 && document.getElementsByName("tok_mode")[0].checked && document.getElementsByName("sgml_mode")[0].checked){
// Auto mode selected but input contains pipes
var r = confirm("Your input contains pipes ('|') but you selected Automatic tokenization. Really proceed?");
if (r == false) {
return false;
}
}
**access_js**
}
</script>
<div id="bottomcontent">
<footer id="footer">
**footer**
</footer>
</div>
<script>
function disable_checkboxes(val){
document.getElementById("milestone").disabled = val;
document.getElementById("tok").disabled = val;
document.getElementById("tag").disabled = val;
document.getElementById("lemma").disabled = val;
document.getElementById("lang").disabled = val;
document.getElementById("norm").disabled = val;
document.getElementById("mwe").disabled = val;
document.getElementById("parse").disabled = val;
document.getElementById("entities").disabled = val;
radios = document.getElementsByName("tok_mode");
for (radio in radios){
radios[radio].disabled = val;
}
if (val == false){
document.getElementById("milestone").checked = true;
document.getElementById("tok").checked = true;
document.getElementById("tag").checked = true;
document.getElementById("lemma").checked = true;
document.getElementById("lang").checked = true;
document.getElementById("norm").checked = true;
document.getElementById("mwe").checked = true;
document.getElementById("parse").checked = true;
document.getElementById("entities").checked = true;
}
}
if (document.querySelector('input[name="sgml_mode"]:checked').value == "pipes"){
disable_checkboxes(true);
}
function toggle_laytonize(laytonize_on){
if (laytonize_on){
document.getElementById("old_tok").checked = false;
}
else{
document.getElementById("detokenize").checked = false;
}
document.getElementById("norm").disabled = laytonize_on;
if (document.getElementById("detokenize").checked){
document.getElementById("laytonize1").disabled = false;
document.getElementById("laytonize1").checked = true;
document.getElementById("laytonize2").disabled = false;
document.getElementById("segment_merged").disabled = false;
document.getElementById("segment_merged").checked = true;
}
else{
document.getElementById("laytonize1").disabled = true;
document.getElementById("laytonize1").checked = false;
document.getElementById("laytonize2").disabled = true;
document.getElementById("laytonize2").checked = false;
document.getElementById("segment_merged").disabled = true;
document.getElementById("segment_merged").checked = false;
}
}
</script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdn.jsdelivr.net/bxslider/4.2.12/jquery.bxslider.min.js"></script>
<script src="jquery-1.12.4.js"></script>
<script src="render_entities.js"></script>
<script src="raphael.js"></script>
<script src="arborator.draw.js"></script>
<script src="pagination.min.js"></script>
<script src="q_nlp.js"></script>
<script>$(".m-tools").addClass('on');</script>
</div>
</body>
</html>