-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from PyThaiNLP/fix-thainer
Update corpus version
- Loading branch information
Showing
4 changed files
with
201 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,220 @@ | ||
{ | ||
"test": { | ||
"name": "test", | ||
"file_name": "test.txt", | ||
"version": "0.1", | ||
"download": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/test-0.1/test.txt", | ||
"text": "It's test file.", | ||
"md5": "ff1f76282b7adcb310ffad3ecd867c3d", | ||
"about": "-", | ||
"homepage": "https://github.com/PyThaiNLP/pythainlp-corpus/", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
"latest_version": "0.1", | ||
"description": "It's a test file.", | ||
"long_description": "A handy dummy corpus used for testing (like in unit testing) purpose.", | ||
"url": "https://github.com/PyThaiNLP/pythainlp-corpus/", | ||
"project_urls": { | ||
"project_page": "https://www.thainlp.org/", | ||
"source": "https://github.com/PyThaiNLP/pythainlp-corpus/" | ||
}, | ||
"license": "cc-by-sa-4.0", | ||
"authors": [ | ||
"Wannaphong Phatthiyaphaibun" | ||
], | ||
"author_email": "[email protected]", | ||
"versions": { | ||
"0.1": { | ||
"filename": "test.txt", | ||
"download_url": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/test-0.1/test.txt", | ||
"md5": "ff1f76282b7adcb310ffad3ecd867c3d" | ||
} | ||
} | ||
}, | ||
"crfcut": { | ||
"name": "crfcut", | ||
"file_name": "sentenceseg-ted.model", | ||
"version": "0.1", | ||
"download": "https://github.com/vistec-AI/ted_crawler/blob/master/models/sentenceseg-ted.model?raw=true", | ||
"text": "Thai sentence segmentation with CRF trained on TED dataset", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/vistec-AI/ted_crawler/", | ||
"authors": "Charin Polpanumas" | ||
"latest_version": "0.1", | ||
"description": "Thai sentence segmentation with CRF trained on TED dataset", | ||
"long_description": "-", | ||
"url": "https://github.com/vistec-AI/ted_crawler/", | ||
"authors": [ | ||
"Charin Polpanumas" | ||
], | ||
"author_email": "", | ||
"license": "cc-by-sa-4.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "sentenceseg-ted.model", | ||
"download_url": "https://github.com/vistec-AI/ted_crawler/blob/master/models/sentenceseg-ted.model?raw=true", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"g2p": { | ||
"name": "g2p", | ||
"file_name": "wiktionary-11-2-2020.tsv", | ||
"version": "0.1", | ||
"download": "https://raw.githubusercontent.com/PyThaiNLP/lexicon-thai/master/G2P/wiktionary-11-2-2020.tsv", | ||
"text": "Grapheme to Phoneme (G2P) ภาษาไทย", | ||
"md5": "-", | ||
"about": "ข้อมูลดึงมาจากวิกิพจนานุกรมภาษาไทย (Thai Wiktionary)", | ||
"homepage": "https://github.com/PyThaiNLP/lexicon-thai/tree/master/G2P", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
"latest_version": "0.1", | ||
"description": "Grapheme to Phoneme (G2P) ภาษาไทย", | ||
"long_description": "ข้อมูลดึงมาจากวิกิพจนานุกรมภาษาไทย (Thai Wiktionary)", | ||
"url": "https://github.com/PyThaiNLP/lexicon-thai/tree/master/G2P", | ||
"authors": [ | ||
"Wannaphong Phatthiyaphaibun" | ||
], | ||
"author_email": "[email protected]", | ||
"license": "cc-by-sa-4.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "wiktionary-11-2-2020.tsv", | ||
"download_url": "https://raw.githubusercontent.com/PyThaiNLP/lexicon-thai/master/G2P/wiktionary-11-2-2020.tsv", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thai-g2p": { | ||
"name": "thai-g2p", | ||
"file_name": "thaig2p-0.1.tar", | ||
"version": "0.1", | ||
"download": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thaig2p-v0.1/thaig2p-0.1.tar", | ||
"text": "Thai Grapheme to Phoneme (G2P)", | ||
"md5": "-", | ||
"about": "Thai Grapheme to Phoneme (G2P) in PyTorch", | ||
"homepage": "https://github.com/wannaphong/thai-g2p/", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
"latest_version": "0.1", | ||
"description": "Thai Grapheme to Phoneme (G2P)", | ||
"long_description": "Thai Grapheme to Phoneme (G2P) in PyTorch", | ||
"url": "https://github.com/wannaphong/thai-g2p/", | ||
"authors": [ | ||
"Wannaphong Phatthiyaphaibun" | ||
], | ||
"author_email": "[email protected]", | ||
"license": "apache-2.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "thaig2p-0.1.tar", | ||
"download_url": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thaig2p-v0.1/thaig2p-0.1.tar", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thai2fit_wv": { | ||
"name": "thai2fit_wv", | ||
"file_name": "thai2vec.bin", | ||
"version": "0.1", | ||
"download": "https://www.dropbox.com/s/yuq0gp1eges8j5n/thai2vec.bin?dl=1", | ||
"text": "thai2vec word embeddings", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/cstorm125/thai2fit/", | ||
"authors": "Charin Polpanumas" | ||
"latest_version": "0.1", | ||
"description": "thai2vec word embeddings", | ||
"long_description": "-", | ||
"url": "https://github.com/cstorm125/thai2fit/", | ||
"authors": [ | ||
"Charin Polpanumas" | ||
], | ||
"author_email": "", | ||
"license": "", | ||
"versions": { | ||
"0.1": { | ||
"filename": "thai2vec.bin", | ||
"download_url": "https://www.dropbox.com/s/yuq0gp1eges8j5n/thai2vec.bin?dl=1", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thai2rom-dataset": { | ||
"name": "thai2rom-dataset", | ||
"file_name": "thai2rom.csv", | ||
"version": "0.1", | ||
"download": "https://raw.githubusercontent.com/wannaphong/thai-romanization/master/dataset/data.csv", | ||
"text": "-", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/wannaphongcom/thai-romanization/", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
"latest_version": "0.1", | ||
"description": "-", | ||
"long_description": "-", | ||
"url": "https://github.com/wannaphongcom/thai-romanization/", | ||
"authors": [ | ||
"Wannaphong Phatthiyaphaibun" | ||
], | ||
"author_email": "[email protected]", | ||
"license": "apache-2.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "thai2rom.csv", | ||
"download_url": "https://raw.githubusercontent.com/wannaphong/thai-romanization/master/dataset/data.csv", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thai2rom-pytorch": { | ||
"name": "thai2rom-pytorch", | ||
"file_name": "thai2rom-pytorch.tar", | ||
"version": "0.1", | ||
"download": "https://raw.githubusercontent.com/c4n/thai-romanization/master/notebook/thai2rom-pytorch.tar", | ||
"text": "-", | ||
"md5": "-", | ||
"about": "LSTM encoder-decoder model", | ||
"homepage": "https://github.com/c4n/thai-romanization/", | ||
"authors": "Can Udomcharoenchaikit" | ||
"latest_version": "0.1", | ||
"description": "-", | ||
"long_description": "LSTM encoder-decoder model", | ||
"url": "https://github.com/c4n/thai-romanization/", | ||
"authors": [ | ||
"Can Udomcharoenchaikit" | ||
], | ||
"author_email": "", | ||
"license": "apache-2.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "thai2rom-pytorch.tar", | ||
"download_url": "https://raw.githubusercontent.com/c4n/thai-romanization/master/notebook/thai2rom-pytorch.tar", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thai2rom-pytorch-attn": { | ||
"name": "thai2rom-pytorch-attn", | ||
"file_name": "thai2rom-pytorch-attn-v0.1.tar", | ||
"version": "0.1", | ||
"download": "https://raw.githubusercontent.com/artificiala/thai-romanization/master/notebook/thai2rom-pytorch-attn-v0.1.tar", | ||
"text": "-", | ||
"md5": "-", | ||
"about": "LSTM encoder-decoder model with attention mechanism", | ||
"homepage": "https://github.com/artificiala/thai-romanization/", | ||
"authors": "Chakri Lowphansirikul" | ||
"latest_version": "0.1", | ||
"description": "-", | ||
"long_description": "LSTM encoder-decoder model with attention mechanism", | ||
"url": "https://github.com/artificiala/thai-romanization/", | ||
"authors": [ | ||
"Chakri Lowphansirikul" | ||
], | ||
"author_email": "", | ||
"license": "apache-2.0", | ||
"versions": { | ||
"0.1": { | ||
"filename": "thai2rom-pytorch-attn-v0.1.tar", | ||
"download_url": "https://raw.githubusercontent.com/artificiala/thai-romanization/master/notebook/thai2rom-pytorch-attn-v0.1.tar", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"thainer-1-3": { | ||
"thainer": { | ||
"name": "thainer", | ||
"file_name": "thainer.model", | ||
"version": "1.3", | ||
"download": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thainer-1.3/data.model", | ||
"text": "Thai Named Entity Recognition For PyThaiNLP", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/wannaphong/thai-ner/", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
}, | ||
"thainer-1-4": { | ||
"name": "thainer", | ||
"file_name": "thai-ner-1-4.crfsuite", | ||
"version": "1.4", | ||
"download": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thainer-1.4/thai-ner-1-4.crfsuite", | ||
"text": "Thai Named Entity Recognition For PyThaiNLP", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/wannaphong/thai-ner/", | ||
"authors": "Wannaphong Phatthiyaphaibun" | ||
"latest_version": "1.4", | ||
"description": "Thai Named Entity Recognition For PyThaiNLP", | ||
"long_description": "-", | ||
"url": "https://github.com/wannaphong/thai-ner/", | ||
"authors": [ | ||
"Wannaphong Phatthiyaphaibun" | ||
], | ||
"author_email": "[email protected]", | ||
"license": "apache-2.0", | ||
"versions": { | ||
"1.3": { | ||
"filename": "thai-ner-1-3.crfsuite", | ||
"download_url": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thainer-1.3/data.model", | ||
"md5": "-" | ||
}, | ||
"1.4": { | ||
"filename": "thai-ner-1-4.crfsuite", | ||
"download_url": "https://github.com/PyThaiNLP/pythainlp-corpus/releases/download/thainer-1.4/thai-ner-1-4.crfsuite", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"wiki_itos_lstm": { | ||
"name": "wiki_itos_lstm", | ||
"file_name": "itos_lstm.pkl", | ||
"version": "0.32", | ||
"download": "https://www.dropbox.com/s/87p5ugshid4mbcm/thwiki_itos.pkl?dl=1", | ||
"text": "ULMFit index to text for LSTM", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/cstorm125/thai2fit/", | ||
"authors": "Charin Polpanumas" | ||
"latest_version": "0.32", | ||
"description": "ULMFit index to text for LSTM", | ||
"long_description": "-", | ||
"url": "https://github.com/cstorm125/thai2fit/", | ||
"authors": [ | ||
"Charin Polpanumas" | ||
], | ||
"author_email": "", | ||
"license": "cc-by-sa-4.0", | ||
"versions": { | ||
"0.32": { | ||
"filename": "itos_lstm.pkl", | ||
"download_url": "https://www.dropbox.com/s/87p5ugshid4mbcm/thwiki_itos.pkl?dl=1", | ||
"md5": "-" | ||
} | ||
} | ||
}, | ||
"wiki_lm_lstm": { | ||
"name": "wiki_lm_lstm", | ||
"file_name": "thwiki_model_lstm.pth", | ||
"version": "0.32", | ||
"download": "https://www.dropbox.com/s/7za2o1nmq8s3fex/thwiki_lm.pth?dl=1", | ||
"text": "Wiki-pretrained ULMFit language model for LSTM", | ||
"md5": "-", | ||
"about": "-", | ||
"homepage": "https://github.com/cstorm125/thai2fit/", | ||
"authors": "Charin Polpanumas" | ||
"latest_version": "0.32", | ||
"description": "Wiki-pretrained ULMFit language model for LSTM", | ||
"long_description": "-", | ||
"url": "https://github.com/cstorm125/thai2fit/", | ||
"authors": [ | ||
"Charin Polpanumas" | ||
], | ||
"author_email": "", | ||
"license": "cc-by-sa-4.0", | ||
"versions": { | ||
"0.32": { | ||
"filename": "thwiki_model_lstm.pth", | ||
"download_url": "https://www.dropbox.com/s/7za2o1nmq8s3fex/thwiki_lm.pth?dl=1", | ||
"md5": "-" | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters