forked from nitotm/efficient-language-detector-js
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.html
65 lines (54 loc) · 2.93 KB
/
demo.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Demo</title>
</head>
<body>
<main>
<h1>Demo</h1>
<div id="demo">See results at the console.</div>
</main>
<!-- <script src="minified/eld.M60.min.js" charset="utf-8"></script> -->
<script type="module">
import { eld } from './src/languageDetector.js'
await eld.init('L')
// detect() expects a UTF-8 string, returns an object, with a 'language' variable : ISO 639-1 code or empty string
console.log( eld.detect('Hola, cómo te llamas?') )
// { language: 'es', getScores(): {'es': 0.5, 'et': 0.2}, isReliable(): true }
// { language: string, getScores(): Object, isReliable(): boolean }
console.log( eld.detect('Hola, cómo te llamas?').language )
// 'es'
/*
* To reduce the languages to be detected, there are 2 options, they only need to be executed once.
* This is the complete list on languages for ELD v1, using ISO 639-1 codes:
* ['am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr',
* 'gu', 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr',
* 'ms', 'nl', 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl',
* 'tr', 'uk', 'ur', 'vi', 'yo', 'zh']
*/
let langSubset = ['en', 'es', 'fr', 'it', 'nl', 'de']
// Option 1. With dynamicLangSubset, detector executes normally, but at the end will filter the excluded languages
console.log( eld.dynamicLangSubset(langSubset) ) // Returns the validated languages of the subset
// { 9: "de", 11: "en", 12: "es", 17: "fr", 25: "it", 37: "nl" }
// to remove the subset
eld.dynamicLangSubset(false)
/*
Option 2. The optimal way to regularly use the same subset, will be using saveSubset()
saveSubset() will download a new database of Ngrams with only the subset languages. (Only at a web browser)
It is not included in the minified version.
saveSubset() is also NOT included for Node.js, you would need to use a browser to download the Ngrams file
*/
// eld.saveSubset(langSubset)
// We can dynamically load any Ngrams database saved at src/ngrams/, including subsets. Returns true if success
await eld.loadNgrams('ngramsL60.js') // Will not remove any active dynamic subset
// eld.loadNgrams('ngramsL60.js').then((loaded) => { if (loaded) { /* code */ } })
// To change the preloaded database on import, edit the file at scr/languageDetector.js: loadNgrams('ngramsM60.js')
// if you believe eld should change this database preload behavior, comment at gitHub /discussions
// Finally, we can get the current status of eld: languages, database type and subset
console.log( eld.info() )
</script>
</body>
</html>