Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
honnibal committed Feb 7, 2019
2 parents 9609fbe + 6abb7b7 commit 0923175
Show file tree
Hide file tree
Showing 16 changed files with 627 additions and 15 deletions.
48 changes: 33 additions & 15 deletions compatibility.json
Original file line number Diff line number Diff line change
Expand Up @@ -464,23 +464,41 @@
"de_core_news_md": ["1.0.0"],
"en_vectors_glove_md": ["1.0.0"]
},
"2.1.0a7": {
"en_vectors_web_lg": ["2.0.0"],
"en_core_web_sm": ["2.1.0a7"],
"en_core_web_md": ["2.1.0a7"],
"en_core_web_lg": ["2.1.0a7"],
"de_core_news_sm": ["2.1.0a7"],
"de_core_news_md": ["2.1.0a7"],
"es_core_news_sm": ["2.1.0a7"],
"es_core_news_md": ["2.1.0a7"],
"pt_core_news_sm": ["2.1.0a7"],
"fr_core_news_sm": ["2.1.0a7"],
"fr_core_news_md": ["2.1.0a7"],
"el_core_news_sm": ["2.1.0a7"],
"el_core_news_md": ["2.1.0a7"],
"it_core_news_sm": ["2.1.0a7"],
"nl_core_news_sm": ["2.1.0a7"],
"xx_ent_wiki_sm": ["2.1.0a7"]
},
"2.1.0a6": {
"en_vectors_web_lg": ["2.0.0"],
"en_core_web_sm": ["2.1.0a5"],
"en_core_web_md": ["2.1.0a5"],
"en_core_web_lg": ["2.1.0a5"],
"de_core_news_sm": ["2.1.0a5"],
"de_core_news_md": ["2.1.0a5"],
"es_core_news_sm": ["2.1.0a5"],
"es_core_news_md": ["2.1.0a5"],
"pt_core_news_sm": ["2.1.0a5"],
"fr_core_news_sm": ["2.1.0a5"],
"fr_core_news_md": ["2.1.0a5"],
"el_core_news_sm": ["2.1.0a5"],
"el_core_news_md": ["2.1.0a5"],
"it_core_news_sm": ["2.1.0a5"],
"nl_core_news_sm": ["2.1.0a5"],
"xx_ent_wiki_sm": ["2.1.0a5"]
"en_core_web_sm": ["2.1.0a5", "2.1.0a6"],
"en_core_web_md": ["2.1.0a5", "2.1.0a6"],
"en_core_web_lg": ["2.1.0a5", "2.1.0a6"],
"de_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"de_core_news_md": ["2.1.0a5", "2.1.0a6"],
"es_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"es_core_news_md": ["2.1.0a5", "2.1.0a6"],
"pt_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"fr_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"fr_core_news_md": ["2.1.0a5", "2.1.0a6"],
"el_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"el_core_news_md": ["2.1.0a5", "2.1.0a6"],
"it_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"nl_core_news_sm": ["2.1.0a5", "2.1.0a6"],
"xx_ent_wiki_sm": ["2.1.0a5", "2.1.0a6"]
},
"2.1.0a5": {
"en_vectors_web_lg": ["2.0.0"],
Expand Down
44 changes: 44 additions & 0 deletions meta/de_core_news_md-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"accuracy": {
"ents_f": 83.8971731371,
"ents_p": 84.725523799,
"ents_r": 83.084863007,
"las": 90.3313400502,
"tags_acc": 97.4595722142,
"token_acc": 99.4792618308,
"uas": 92.1905835918
},
"author": "Explosion AI",
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.",
"email": "[email protected]",
"lang": "de",
"license": "MIT",
"name": "core_news_md",
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"TIGER Corpus",
"Wikipedia"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 6860.2096209861,
"gpu": null,
"nwords": 696811
},
"url": "https://explosion.ai",
"vectors": {
"keys": 276087,
"name": "de_model.vectors",
"vectors": 20000,
"width": 300
},
"version": "2.1.0a6",
"size": "210 MB",
"checksum": "4338d30cbf5f8c2c25d05e2d830d5d2783024c16f436fabcb397b61fa3a40a92"
}
38 changes: 38 additions & 0 deletions meta/de_core_news_sm-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"accuracy": {
"ents_f": 83.3361633136,
"ents_p": 84.2093028404,
"ents_r": 82.4809445848,
"las": 89.5520616147,
"tags_acc": 97.2045485755,
"token_acc": 99.4792618308,
"uas": 91.623693173
},
"author": "Explosion AI",
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.",
"email": "[email protected]",
"lang": "de",
"license": "MIT",
"name": "core_news_sm",
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"TIGER Corpus",
"Wikipedia"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 7351.1354011755,
"gpu": null,
"nwords": 696811
},
"url": "https://explosion.ai",
"version": "2.1.0a6",
"size": "10 MB",
"checksum": "60c70639a46b0888154815ebb932bbfe3366134be41b959b62047698bd654f45"
}
44 changes: 44 additions & 0 deletions meta/el_core_news_md-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"accuracy": {
"ents_f": 81.0020876827,
"ents_p": 80.4979253112,
"ents_r": 81.512605042,
"las": 85.1989785061,
"tags_acc": 96.5910179067,
"token_acc": 100.0,
"uas": 88.3592253671
},
"author": "Giannis Daras",
"description": "Greek pipeline with word vectors, POS tags, dependencies and named entities. Word vectors use Facebook's FastText Common Crawl vectors, pruned to a vocabulary of 20,000 items. Words outside the most frequent were mapped to the nearest neighbouring vector within the 20,000 rows retained. Syntax (dependencies and POS tags) trained from the Universal Dependencies conversion of the Greek Dependency Treebank (v2.2). Named entity annotations were created by Giannis Daras using Prodigy, using the OntoNotes 5 annotation schema.",
"email": "[email protected]",
"lang": "el",
"license": "CC BY-NC 4.0",
"name": "core_news_md",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"Common Crawl",
"Greek Dependency Treebank",
"Daras GSOC 2018"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 7654.0669709891,
"gpu": null,
"nwords": 6191
},
"url": "https://github.com/eellak/gsoc2018-spacy",
"vectors": {
"keys": 1999938,
"name": "el_model.vectors",
"vectors": 20000,
"width": 300
},
"version": "2.1.0a6",
"size": "126 MB",
"checksum": "bbbc474cc51dec46018abf06f6b8b61f3f35756d389fee8053bb533e1ec610ee"
}
37 changes: 37 additions & 0 deletions meta/el_core_news_sm-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"accuracy": {
"ents_f": 73.1006160164,
"ents_p": 71.4859437751,
"ents_r": 74.7899159664,
"las": 81.4743896601,
"tags_acc": 94.7716173513,
"token_acc": 100.0,
"uas": 84.9742034998
},
"author": "Giannis Daras",
"description": "Greek pipeline with word vectors, POS tags, dependencies and named entities. Word vectors use Facebook's FastText Common Crawl vectors, pruned to a vocabulary of 20,000 items. Words outside the most frequent were mapped to the nearest neighbouring vector within the 20,000 rows retained. Syntax (dependencies and POS tags) trained from the Universal Dependencies conversion of the Greek Dependency Treebank (v2.2). Named entity annotations were created by Giannis Daras using Prodigy, using the OntoNotes 5 annotation schema.",
"email": "[email protected]",
"lang": "el",
"license": "CC BY-NC 4.0",
"name": "core_news_sm",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"Greek Dependency Treebank",
"Daras GSOC 2018"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 8658.590425769,
"gpu": null,
"nwords": 6191
},
"url": "https://github.com/eellak/gsoc2018-spacy",
"version": "2.1.0a6",
"size": "10 MB",
"checksum": "4ca49e6fafabff31df82e53df79f545f6bf78b12fa22146e867ce757aeb55ee4"
}
43 changes: 43 additions & 0 deletions meta/en_core_web_lg-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"accuracy": {
"ents_f": 86.6234201991,
"ents_p": 86.6767388507,
"ents_r": 86.5701671045,
"las": 90.2009214654,
"tags_acc": 97.0161369547,
"token_acc": 99.0646795541,
"uas": 91.9664476128
},
"author": "Explosion AI",
"description": "English multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities.",
"email": "[email protected]",
"lang": "en",
"license": "MIT",
"name": "core_web_lg",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"OntoNotes 5",
"Common Crawl"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 7064.0263326354,
"gpu": null,
"nwords": 291344
},
"url": "https://explosion.ai",
"vectors": {
"keys": 684830,
"name": "en_model.vectors",
"vectors": 684831,
"width": 300
},
"version": "2.1.0a6",
"size": "788 MB",
"checksum": "6ee2325f253b8f74693c07311071eab99e504acfc37f8da7a6a88a53fb0496f9"
}
43 changes: 43 additions & 0 deletions meta/en_core_web_md-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"accuracy": {
"ents_f": 86.403312444,
"ents_p": 86.5049577055,
"ents_r": 86.3019057732,
"las": 90.1602551636,
"tags_acc": 96.9558151118,
"token_acc": 99.0646795541,
"uas": 91.9405655607
},
"author": "Explosion AI",
"description": "English multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities.",
"email": "[email protected]",
"lang": "en",
"license": "MIT",
"name": "core_web_md",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"OntoNotes 5",
"Common Crawl"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 7641.3076324931,
"gpu": null,
"nwords": 291344
},
"url": "https://explosion.ai",
"vectors": {
"keys": 684830,
"name": "en_model.vectors",
"vectors": 20000,
"width": 300
},
"version": "2.1.0a6",
"size": "91 MB",
"checksum": "ea971369a13056cee2bddaaf1c5b342b16bc0a0f45228abde4b4c4635f469f1f"
}
36 changes: 36 additions & 0 deletions meta/en_core_web_sm-2.1.0a6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"accuracy": {
"ents_f": 85.494302433,
"ents_p": 85.6597845601,
"ents_r": 85.3294584474,
"las": 89.6400460442,
"tags_acc": 96.7982889278,
"token_acc": 99.0646795541,
"uas": 91.5273154342
},
"author": "Explosion AI",
"description": "English multi-task CNN trained on OntoNotes. Assigns context-specific token vectors, POS tags, dependency parse and named entities.",
"email": "[email protected]",
"lang": "en",
"license": "MIT",
"name": "core_web_sm",
"parent_package": "spacy-nightly",
"pipeline": [
"tagger",
"parser",
"ner"
],
"sources": [
"OntoNotes 5"
],
"spacy_version": ">=2.1.0a4",
"speed": {
"cpu": 7435.3852845254,
"gpu": null,
"nwords": 291344
},
"url": "https://explosion.ai",
"version": "2.1.0a6",
"size": "10 MB",
"checksum": "927785b2aabb43d888437295a11b071798570dbd8c67cf80c611bc1c6927898c"
}
Loading

0 comments on commit 0923175

Please sign in to comment.