forked from pemistahl/lingua-rs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Cargo.toml
256 lines (243 loc) · 13.8 KB
/
Cargo.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# Copyright © 2020-today Peter M. Stahl [email protected]
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = ["language-models/*"]
[package]
name = "lingua"
version = "1.5.0"
authors = ["Peter M. Stahl <[email protected]>"]
description = """
An accurate natural language detection library, suitable for long and short text alike
"""
homepage = "https://github.com/pemistahl/lingua-rs"
repository = "https://github.com/pemistahl/lingua-rs"
documentation = "https://docs.rs/lingua"
license = "Apache-2.0"
readme = "README.md"
edition = "2021"
categories = ["text-processing"]
keywords = [
"language-processing",
"language-detection",
"language-recognition",
"nlp"
]
[profile.dev]
opt-level = 1
[lib]
crate-type = ["cdylib", "rlib"]
[[bin]]
name = "accuracy_reports"
required-features = ["accuracy-reports"]
[[bench]]
name = "benchmark"
harness = false
[dependencies]
brotli = "3.3.4"
fraction = "0.13.1"
include_dir = "0.7.3"
itertools = "0.10.5"
maplit = "1.0.2"
once_cell = "1.17.2"
regex = "1.8.3"
serde = { version = "1.0.163", features = ["derive"] }
serde_json = "1.0.96"
strum = "0.24.1"
strum_macros = "0.24.3"
lingua-afrikaans-language-model = { path = "language-models/af", version = "1.1.0", optional = true }
lingua-albanian-language-model = { path = "language-models/sq", version = "1.1.0", optional = true }
lingua-arabic-language-model = { path = "language-models/ar", version = "1.1.0", optional = true }
lingua-armenian-language-model = { path = "language-models/hy", version = "1.1.0", optional = true }
lingua-azerbaijani-language-model = { path = "language-models/az", version = "1.1.0", optional = true }
lingua-basque-language-model = { path = "language-models/eu", version = "1.1.0", optional = true }
lingua-belarusian-language-model = { path = "language-models/be", version = "1.1.0", optional = true }
lingua-bengali-language-model = { path = "language-models/bn", version = "1.1.0", optional = true }
lingua-bokmal-language-model = { path = "language-models/nb", version = "1.1.0", optional = true }
lingua-bosnian-language-model = { path = "language-models/bs", version = "1.1.0", optional = true }
lingua-bulgarian-language-model = { path = "language-models/bg", version = "1.1.0", optional = true }
lingua-catalan-language-model = { path = "language-models/ca", version = "1.1.0", optional = true }
lingua-chinese-language-model = { path = "language-models/zh", version = "1.1.0", optional = true }
lingua-croatian-language-model = { path = "language-models/hr", version = "1.1.0", optional = true }
lingua-czech-language-model = { path = "language-models/cs", version = "1.1.0", optional = true }
lingua-danish-language-model = { path = "language-models/da", version = "1.1.0", optional = true }
lingua-dutch-language-model = { path = "language-models/nl", version = "1.1.0", optional = true }
lingua-english-language-model = { path = "language-models/en", version = "1.1.0", optional = true }
lingua-esperanto-language-model = { path = "language-models/eo", version = "1.1.0", optional = true }
lingua-estonian-language-model = { path = "language-models/et", version = "1.1.0", optional = true }
lingua-finnish-language-model = { path = "language-models/fi", version = "1.1.0", optional = true }
lingua-french-language-model = { path = "language-models/fr", version = "1.1.0", optional = true }
lingua-ganda-language-model = { path = "language-models/lg", version = "1.1.0", optional = true }
lingua-georgian-language-model = { path = "language-models/ka", version = "1.1.0", optional = true }
lingua-german-language-model = { path = "language-models/de", version = "1.1.0", optional = true }
lingua-greek-language-model = { path = "language-models/el", version = "1.1.0", optional = true }
lingua-gujarati-language-model = { path = "language-models/gu", version = "1.1.0", optional = true }
lingua-hebrew-language-model = { path = "language-models/he", version = "1.1.0", optional = true }
lingua-hindi-language-model = { path = "language-models/hi", version = "1.1.0", optional = true }
lingua-hungarian-language-model = { path = "language-models/hu", version = "1.1.0", optional = true }
lingua-icelandic-language-model = { path = "language-models/is", version = "1.1.0", optional = true }
lingua-indonesian-language-model = { path = "language-models/id", version = "1.1.0", optional = true }
lingua-irish-language-model = { path = "language-models/ga", version = "1.1.0", optional = true }
lingua-italian-language-model = { path = "language-models/it", version = "1.1.0", optional = true }
lingua-japanese-language-model = { path = "language-models/ja", version = "1.1.0", optional = true }
lingua-kazakh-language-model = { path = "language-models/kk", version = "1.1.0", optional = true }
lingua-korean-language-model = { path = "language-models/ko", version = "1.1.0", optional = true }
lingua-latin-language-model = { path = "language-models/la", version = "1.1.0", optional = true }
lingua-latvian-language-model = { path = "language-models/lv", version = "1.1.0", optional = true }
lingua-lithuanian-language-model = { path = "language-models/lt", version = "1.1.0", optional = true }
lingua-macedonian-language-model = { path = "language-models/mk", version = "1.1.0", optional = true }
lingua-malay-language-model = { path = "language-models/ms", version = "1.1.0", optional = true }
lingua-maori-language-model = { path = "language-models/mi", version = "1.1.0", optional = true }
lingua-marathi-language-model = { path = "language-models/mr", version = "1.1.0", optional = true }
lingua-mongolian-language-model = { path = "language-models/mn", version = "1.1.0", optional = true }
lingua-nynorsk-language-model = { path = "language-models/nn", version = "1.1.0", optional = true }
lingua-persian-language-model = { path = "language-models/fa", version = "1.1.0", optional = true }
lingua-polish-language-model = { path = "language-models/pl", version = "1.1.0", optional = true }
lingua-portuguese-language-model = { path = "language-models/pt", version = "1.1.0", optional = true }
lingua-punjabi-language-model = { path = "language-models/pa", version = "1.1.0", optional = true }
lingua-romanian-language-model = { path = "language-models/ro", version = "1.1.0", optional = true }
lingua-russian-language-model = { path = "language-models/ru", version = "1.1.0", optional = true }
lingua-serbian-language-model = { path = "language-models/sr", version = "1.1.0", optional = true }
lingua-shona-language-model = { path = "language-models/sn", version = "1.1.0", optional = true }
lingua-slovak-language-model = { path = "language-models/sk", version = "1.1.0", optional = true }
lingua-slovene-language-model = { path = "language-models/sl", version = "1.1.0", optional = true }
lingua-somali-language-model = { path = "language-models/so", version = "1.1.0", optional = true }
lingua-sotho-language-model = { path = "language-models/st", version = "1.1.0", optional = true }
lingua-spanish-language-model = { path = "language-models/es", version = "1.1.0", optional = true }
lingua-swahili-language-model = { path = "language-models/sw", version = "1.1.0", optional = true }
lingua-swedish-language-model = { path = "language-models/sv", version = "1.1.0", optional = true }
lingua-tagalog-language-model = { path = "language-models/tl", version = "1.1.0", optional = true }
lingua-tamil-language-model = { path = "language-models/ta", version = "1.1.0", optional = true }
lingua-telugu-language-model = { path = "language-models/te", version = "1.1.0", optional = true }
lingua-thai-language-model = { path = "language-models/th", version = "1.1.0", optional = true }
lingua-tsonga-language-model = { path = "language-models/ts", version = "1.1.0", optional = true }
lingua-tswana-language-model = { path = "language-models/tn", version = "1.1.0", optional = true }
lingua-turkish-language-model = { path = "language-models/tr", version = "1.1.0", optional = true }
lingua-ukrainian-language-model = { path = "language-models/uk", version = "1.1.0", optional = true }
lingua-urdu-language-model = { path = "language-models/ur", version = "1.1.0", optional = true }
lingua-vietnamese-language-model = { path = "language-models/vi", version = "1.1.0", optional = true }
lingua-welsh-language-model = { path = "language-models/cy", version = "1.1.0", optional = true }
lingua-xhosa-language-model = { path = "language-models/xh", version = "1.1.0", optional = true }
lingua-yoruba-language-model = { path = "language-models/yo", version = "1.1.0", optional = true }
lingua-zulu-language-model = { path = "language-models/zu", version = "1.1.0", optional = true }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
ahash = "0.8.3"
cld2 = { version = "1.0.2", optional = true }
indoc = { version = "2.0.1", optional = true }
rayon = "1.7.0"
titlecase = { version = "2.2.0", optional = true }
whatlang = { version = "0.16.2", optional = true }
[target.'cfg(target_family = "wasm")'.dependencies]
ahash = { version = "0.8.3", default-features = false, features = ["std", "compile-time-rng"] }
serde-wasm-bindgen = "0.5.0"
wasm-bindgen = "0.2.86"
[dev-dependencies]
float-cmp = "0.9.0"
indoc = "2.0.1"
rstest = "0.17.0"
tempfile = "3.5.0"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.5.1"
[target.'cfg(target_family = "wasm")'.dev-dependencies]
wasm-bindgen-test = "0.3.36"
[features]
default = [
"afrikaans", "albanian", "arabic", "armenian", "azerbaijani", "basque",
"belarusian", "bengali", "bokmal", "bosnian", "bulgarian", "catalan",
"chinese", "croatian", "czech", "danish", "dutch", "english", "esperanto",
"estonian", "finnish", "french", "ganda", "georgian", "german", "greek",
"gujarati", "hebrew", "hindi", "hungarian", "icelandic", "indonesian",
"irish", "italian", "japanese", "kazakh", "korean", "latin", "latvian",
"lithuanian", "macedonian", "malay", "maori", "marathi", "mongolian",
"nynorsk", "persian", "polish", "portuguese", "punjabi", "romanian",
"russian", "serbian", "shona", "slovak", "slovene", "somali", "sotho",
"spanish", "swahili", "swedish", "tagalog", "tamil", "telugu", "thai",
"tsonga", "tswana", "turkish", "ukrainian", "urdu", "vietnamese",
"welsh", "xhosa", "yoruba", "zulu"
]
accuracy-reports = ["cld2", "indoc", "titlecase", "whatlang"]
afrikaans = ["lingua-afrikaans-language-model"]
albanian = ["lingua-albanian-language-model"]
arabic = ["lingua-arabic-language-model"]
armenian = ["lingua-armenian-language-model"]
azerbaijani = ["lingua-azerbaijani-language-model"]
basque = ["lingua-basque-language-model"]
belarusian = ["lingua-belarusian-language-model"]
bengali = ["lingua-bengali-language-model"]
bokmal = ["lingua-bokmal-language-model"]
bosnian = ["lingua-bosnian-language-model"]
bulgarian = ["lingua-bulgarian-language-model"]
catalan = ["lingua-catalan-language-model"]
chinese = ["lingua-chinese-language-model"]
croatian = ["lingua-croatian-language-model"]
czech = ["lingua-czech-language-model"]
danish = ["lingua-danish-language-model"]
dutch = ["lingua-dutch-language-model"]
english = ["lingua-english-language-model"]
esperanto = ["lingua-esperanto-language-model"]
estonian = ["lingua-estonian-language-model"]
finnish = ["lingua-finnish-language-model"]
french = ["lingua-french-language-model"]
ganda = ["lingua-ganda-language-model"]
georgian = ["lingua-georgian-language-model"]
german = ["lingua-german-language-model"]
greek = ["lingua-greek-language-model"]
gujarati = ["lingua-gujarati-language-model"]
hebrew = ["lingua-hebrew-language-model"]
hindi = ["lingua-hindi-language-model"]
hungarian = ["lingua-hungarian-language-model"]
icelandic = ["lingua-icelandic-language-model"]
indonesian = ["lingua-indonesian-language-model"]
irish = ["lingua-irish-language-model"]
italian = ["lingua-italian-language-model"]
japanese = ["lingua-japanese-language-model"]
kazakh = ["lingua-kazakh-language-model"]
korean = ["lingua-korean-language-model"]
latin = ["lingua-latin-language-model"]
latvian = ["lingua-latvian-language-model"]
lithuanian = ["lingua-lithuanian-language-model"]
macedonian = ["lingua-macedonian-language-model"]
malay = ["lingua-malay-language-model"]
maori = ["lingua-maori-language-model"]
marathi = ["lingua-marathi-language-model"]
mongolian = ["lingua-mongolian-language-model"]
nynorsk = ["lingua-nynorsk-language-model"]
persian = ["lingua-persian-language-model"]
polish = ["lingua-polish-language-model"]
portuguese = ["lingua-portuguese-language-model"]
punjabi = ["lingua-punjabi-language-model"]
romanian = ["lingua-romanian-language-model"]
russian = ["lingua-russian-language-model"]
serbian = ["lingua-serbian-language-model"]
shona = ["lingua-shona-language-model"]
slovak = ["lingua-slovak-language-model"]
slovene = ["lingua-slovene-language-model"]
somali = ["lingua-somali-language-model"]
sotho = ["lingua-sotho-language-model"]
spanish = ["lingua-spanish-language-model"]
swahili = ["lingua-swahili-language-model"]
swedish = ["lingua-swedish-language-model"]
tagalog = ["lingua-tagalog-language-model"]
tamil = ["lingua-tamil-language-model"]
telugu = ["lingua-telugu-language-model"]
thai = ["lingua-thai-language-model"]
tsonga = ["lingua-tsonga-language-model"]
tswana = ["lingua-tswana-language-model"]
turkish = ["lingua-turkish-language-model"]
ukrainian = ["lingua-ukrainian-language-model"]
urdu = ["lingua-urdu-language-model"]
vietnamese = ["lingua-vietnamese-language-model"]
welsh = ["lingua-welsh-language-model"]
xhosa = ["lingua-xhosa-language-model"]
yoruba = ["lingua-yoruba-language-model"]
zulu = ["lingua-zulu-language-model"]