-
-
Notifications
You must be signed in to change notification settings - Fork 299
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/explosion/spacy-models
- Loading branch information
Showing
16 changed files
with
627 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 83.8971731371, | ||
"ents_p": 84.725523799, | ||
"ents_r": 83.084863007, | ||
"las": 90.3313400502, | ||
"tags_acc": 97.4595722142, | ||
"token_acc": 99.4792618308, | ||
"uas": 92.1905835918 | ||
}, | ||
"author": "Explosion AI", | ||
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.", | ||
"email": "[email protected]", | ||
"lang": "de", | ||
"license": "MIT", | ||
"name": "core_news_md", | ||
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"TIGER Corpus", | ||
"Wikipedia" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 6860.2096209861, | ||
"gpu": null, | ||
"nwords": 696811 | ||
}, | ||
"url": "https://explosion.ai", | ||
"vectors": { | ||
"keys": 276087, | ||
"name": "de_model.vectors", | ||
"vectors": 20000, | ||
"width": 300 | ||
}, | ||
"version": "2.1.0a6", | ||
"size": "210 MB", | ||
"checksum": "4338d30cbf5f8c2c25d05e2d830d5d2783024c16f436fabcb397b61fa3a40a92" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 83.3361633136, | ||
"ents_p": 84.2093028404, | ||
"ents_r": 82.4809445848, | ||
"las": 89.5520616147, | ||
"tags_acc": 97.2045485755, | ||
"token_acc": 99.4792618308, | ||
"uas": 91.623693173 | ||
}, | ||
"author": "Explosion AI", | ||
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.", | ||
"email": "[email protected]", | ||
"lang": "de", | ||
"license": "MIT", | ||
"name": "core_news_sm", | ||
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"TIGER Corpus", | ||
"Wikipedia" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 7351.1354011755, | ||
"gpu": null, | ||
"nwords": 696811 | ||
}, | ||
"url": "https://explosion.ai", | ||
"version": "2.1.0a6", | ||
"size": "10 MB", | ||
"checksum": "60c70639a46b0888154815ebb932bbfe3366134be41b959b62047698bd654f45" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 81.0020876827, | ||
"ents_p": 80.4979253112, | ||
"ents_r": 81.512605042, | ||
"las": 85.1989785061, | ||
"tags_acc": 96.5910179067, | ||
"token_acc": 100.0, | ||
"uas": 88.3592253671 | ||
}, | ||
"author": "Giannis Daras", | ||
"description": "Greek pipeline with word vectors, POS tags, dependencies and named entities. Word vectors use Facebook's FastText Common Crawl vectors, pruned to a vocabulary of 20,000 items. Words outside the most frequent were mapped to the nearest neighbouring vector within the 20,000 rows retained. Syntax (dependencies and POS tags) trained from the Universal Dependencies conversion of the Greek Dependency Treebank (v2.2). Named entity annotations were created by Giannis Daras using Prodigy, using the OntoNotes 5 annotation schema.", | ||
"email": "[email protected]", | ||
"lang": "el", | ||
"license": "CC BY-NC 4.0", | ||
"name": "core_news_md", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"Common Crawl", | ||
"Greek Dependency Treebank", | ||
"Daras GSOC 2018" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 7654.0669709891, | ||
"gpu": null, | ||
"nwords": 6191 | ||
}, | ||
"url": "https://github.com/eellak/gsoc2018-spacy", | ||
"vectors": { | ||
"keys": 1999938, | ||
"name": "el_model.vectors", | ||
"vectors": 20000, | ||
"width": 300 | ||
}, | ||
"version": "2.1.0a6", | ||
"size": "126 MB", | ||
"checksum": "bbbc474cc51dec46018abf06f6b8b61f3f35756d389fee8053bb533e1ec610ee" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 73.1006160164, | ||
"ents_p": 71.4859437751, | ||
"ents_r": 74.7899159664, | ||
"las": 81.4743896601, | ||
"tags_acc": 94.7716173513, | ||
"token_acc": 100.0, | ||
"uas": 84.9742034998 | ||
}, | ||
"author": "Giannis Daras", | ||
"description": "Greek pipeline with word vectors, POS tags, dependencies and named entities. Word vectors use Facebook's FastText Common Crawl vectors, pruned to a vocabulary of 20,000 items. Words outside the most frequent were mapped to the nearest neighbouring vector within the 20,000 rows retained. Syntax (dependencies and POS tags) trained from the Universal Dependencies conversion of the Greek Dependency Treebank (v2.2). Named entity annotations were created by Giannis Daras using Prodigy, using the OntoNotes 5 annotation schema.", | ||
"email": "[email protected]", | ||
"lang": "el", | ||
"license": "CC BY-NC 4.0", | ||
"name": "core_news_sm", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"Greek Dependency Treebank", | ||
"Daras GSOC 2018" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 8658.590425769, | ||
"gpu": null, | ||
"nwords": 6191 | ||
}, | ||
"url": "https://github.com/eellak/gsoc2018-spacy", | ||
"version": "2.1.0a6", | ||
"size": "10 MB", | ||
"checksum": "4ca49e6fafabff31df82e53df79f545f6bf78b12fa22146e867ce757aeb55ee4" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 86.6234201991, | ||
"ents_p": 86.6767388507, | ||
"ents_r": 86.5701671045, | ||
"las": 90.2009214654, | ||
"tags_acc": 97.0161369547, | ||
"token_acc": 99.0646795541, | ||
"uas": 91.9664476128 | ||
}, | ||
"author": "Explosion AI", | ||
"description": "English multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities.", | ||
"email": "[email protected]", | ||
"lang": "en", | ||
"license": "MIT", | ||
"name": "core_web_lg", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"OntoNotes 5", | ||
"Common Crawl" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 7064.0263326354, | ||
"gpu": null, | ||
"nwords": 291344 | ||
}, | ||
"url": "https://explosion.ai", | ||
"vectors": { | ||
"keys": 684830, | ||
"name": "en_model.vectors", | ||
"vectors": 684831, | ||
"width": 300 | ||
}, | ||
"version": "2.1.0a6", | ||
"size": "788 MB", | ||
"checksum": "6ee2325f253b8f74693c07311071eab99e504acfc37f8da7a6a88a53fb0496f9" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 86.403312444, | ||
"ents_p": 86.5049577055, | ||
"ents_r": 86.3019057732, | ||
"las": 90.1602551636, | ||
"tags_acc": 96.9558151118, | ||
"token_acc": 99.0646795541, | ||
"uas": 91.9405655607 | ||
}, | ||
"author": "Explosion AI", | ||
"description": "English multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities.", | ||
"email": "[email protected]", | ||
"lang": "en", | ||
"license": "MIT", | ||
"name": "core_web_md", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"OntoNotes 5", | ||
"Common Crawl" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 7641.3076324931, | ||
"gpu": null, | ||
"nwords": 291344 | ||
}, | ||
"url": "https://explosion.ai", | ||
"vectors": { | ||
"keys": 684830, | ||
"name": "en_model.vectors", | ||
"vectors": 20000, | ||
"width": 300 | ||
}, | ||
"version": "2.1.0a6", | ||
"size": "91 MB", | ||
"checksum": "ea971369a13056cee2bddaaf1c5b342b16bc0a0f45228abde4b4c4635f469f1f" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"accuracy": { | ||
"ents_f": 85.494302433, | ||
"ents_p": 85.6597845601, | ||
"ents_r": 85.3294584474, | ||
"las": 89.6400460442, | ||
"tags_acc": 96.7982889278, | ||
"token_acc": 99.0646795541, | ||
"uas": 91.5273154342 | ||
}, | ||
"author": "Explosion AI", | ||
"description": "English multi-task CNN trained on OntoNotes. Assigns context-specific token vectors, POS tags, dependency parse and named entities.", | ||
"email": "[email protected]", | ||
"lang": "en", | ||
"license": "MIT", | ||
"name": "core_web_sm", | ||
"parent_package": "spacy-nightly", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"sources": [ | ||
"OntoNotes 5" | ||
], | ||
"spacy_version": ">=2.1.0a4", | ||
"speed": { | ||
"cpu": 7435.3852845254, | ||
"gpu": null, | ||
"nwords": 291344 | ||
}, | ||
"url": "https://explosion.ai", | ||
"version": "2.1.0a6", | ||
"size": "10 MB", | ||
"checksum": "927785b2aabb43d888437295a11b071798570dbd8c67cf80c611bc1c6927898c" | ||
} |
Oops, something went wrong.