Merge pull request #40 from vunb/dev

expose api get vntk logger
vunb · Jun 2, 2018 · e49d71c · e49d71c
2 parents 4254239 + 74066b3
commit e49d71c
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -18,33 +18,57 @@ If you are interested in contributing to **vntk**, or just hacking on it, then f
 
 Jump to guide: [How to build an NLP API Server using Vntk](#nlp-api-server).
 
+# Documentation
+
+* [**CLI Utilities**](#cli-utilities)
+  * [1. Installation](#1-installation)
+  * [2. Usage Example](#2-usage-example)
+* [**API Usage**](#api-usage)
+  * [1. Tokenizer](#1-tokenizer)
+  * [2. Word Segmentation](#2-word-segmentation)
+  * [3. POS Tagging](#3-pos-tagging)
+  * [4. Chunking](#4-chunking)
+  * [5. Named Entity Recognition](#5-named-entity-recognition)
+    * [PER LOC ORG](#ner-per-loc-org)
+    * [Date time](#ner-date-time)
+    * [Custom NER](#ner-custom)
+  * [6. Utility](#6-utility)
+    * [Dictionary](#dictionary)
+    * [Clean html](#clean-html)
+  * [7. TF-IDF](#7-tf-idf)
+  * [8. Classifiers](#8-classifiers)
+    * [Naive Bayes](#bayes-classifier)
+    * [fastText](#fasttext-classifier)
+  * [9. Language identification](#9-language-identification)
+  * [10. CRFSuite](#10-crfsuite)
+* [**NLP API Server**](#nlp-api-server)
+* [**Contributing**](#contributing)
+* [**License**](#license)
+
 # CLI Utilities
 
+## 1. Installation
+
 Vntk cli will install nice and easy with:
 
 > npm install -g @vntk/cli
 
-Then you need to pay attention how to use these cli utilities to preprocess text from files, especially vietnamese that describe at the end of each apis usage. If you wish to improve the tool, please fork and make it better [here](https://github.com/vntk/vntk-cli).
+Then you need to pay attention to how to use these cli utilities to preprocess text from files, especially vietnamese that **describe at the end of each apis usage**. If you wish to improve the tool, please fork and make it better [here](https://github.com/vntk/vntk-cli).
 
-# API Usage
+## 2. Usage Example
 
-* [1. Tokenizer](#1-tokenizer)
-* [2. Word Segmentation](#2-word-segmentation)
-* [3. POS Tagging](#3-pos-tagging)
-* [4. Chunking](#4-chunking)
-* [5. Named Entity Recognition](#5-named-entity-recognition)
-  * [PER LOC ORG](#ner-per-loc-org)
-  * [Date time](#ner-date-time)
-  * [Custom NER](#ner-custom)
-* [6. Utility](#6-utility)
-  * [Dictionary](#dictionary)
-  * [Clean html](#clean-html)
-* [7. TF-IDF](#7-tf-idf)
-* [8. Classifiers](#8-classifiers)
-  * [Naive Bayes](#bayes-classifier)
-  * [fastText](#fasttext-classifier)
-* [9. Language identification](#9-language-identification)
-* [10. CRFSuite](#10-crfsuite)
+After the CLI has installed, you need to open your `Terminal` (or Command Prompt on Windows) and type command you need to use.
+
+For instance, the following command will open a file and process it by using Word Tokenizer to tokenize each lines in the file.
+
+```bash
+# Process a text file or a folder
+$ vntk ws input.txt --output output.txt
+
+# Output file will contain lines which have tokenized.
+```
+
+# API Usage
 
 ## 1. Tokenizer
 
@@ -68,8 +92,8 @@ Command line: `vntk tok <file_name.txt>`
 
 ## 2. Word Segmentation
 
-> Vietnamese Word Segmentation using Conditional Random Fields, called: `WordTokenizer`.  
-> WordTokenizer helps break text into arrays of words!
+> Vietnamese Word Segmentation using Conditional Random Fields, called: `Word Tokenizer`.  
+> Word Tokenizer helps break text into arrays of words!
 
 ```js
 var vntk = require('vntk');
@@ -256,7 +280,7 @@ vntk clean <file_name1.txt>
 [Term Frequency–Inverse Document Frequency (tf-idf)](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) is implemented to determine how important a word (or words) is to a document relative to a corpus. See following example.
 
 ```js
-var vntk = require('./lib/vntk');
+var vntk = require('vntk');
 var tfidf = new vntk.TfIdf();
 
 tfidf.addDocument('Đại tướng Trần Đại Quang - Ủy viên Bộ Chính trị, Bí thư Đảng ủy Công an Trung ương, Bộ trưởng Bộ Công an.');

diff --git a/lib/vntk.js b/lib/vntk.js
@@ -11,17 +11,20 @@ const fs = require('fs')
 const util = require('util')
 // singleton instance
 
-exports.util = () => require('./util');
+/**
+ * Regex Tokenizer
+ */
 exports.tokenizer = () => require('./tokenizer');
 
 /**
  * Word Segmentation
+ * It also is a Word Tokenizer which use a CRF model
  * @param {String} modelFileName new custom model
  */
 exports.wordTokenizer = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return require('./word_tokenizer').newModel(modelFileName)
-    } else  {
+    } else {
         return require('./word_tokenizer')
     }
 }
@@ -31,9 +34,9 @@ exports.wordTokenizer = (modelFileName) => {
  * @param {String} modelFileName new custom model
  */
 exports.posTag = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return require('./pos_tag').newModel(modelFileName)
-    } else  {
+    } else {
         return require('./pos_tag')
     }
 }
@@ -43,9 +46,9 @@ exports.posTag = (modelFileName) => {
  * @param {String} modelFileName new custom model
  */
 exports.chunking = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return require('./chunking').newModel(modelFileName)
-    } else  {
+    } else {
         return require('./chunking')
     }
 };
@@ -55,9 +58,9 @@ exports.chunking = (modelFileName) => {
  * @param {String} modelFileName new custom model
  */
 exports.ner = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return require('./ner').newModel(modelFileName)
-    } else  {
+    } else {
         return require('./ner')
     }
 };
@@ -67,9 +70,9 @@ exports.ner = (modelFileName) => {
  * @param {String} modelFileName new custom model
  */
 exports.langid = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return require('./langid').newModel(modelFileName)
-    } else  {
+    } else {
         return require('./langid')
     }
 };
@@ -79,9 +82,9 @@ exports.langid = (modelFileName) => {
  * @param {String} modelFileName path to new updated dictionary
  */
 exports.dictionary = (modelFileName) => {
-    if(modelFileName && fs.existsSync(modelFileName)) {
+    if (modelFileName && fs.existsSync(modelFileName)) {
         return new require('@vntk/dictionary').Dictionary(modelFileName)
-    } else  {
+    } else {
         return require('@vntk/dictionary')
     }
 }
@@ -100,10 +103,23 @@ exports.BayesClassifier = require('./classifiers').BayesClassifier;
 exports.LogisticRegressionClassifier = require('./classifiers').LogisticRegressionClassifier;
 exports.FastTextClassifier = require('./classifiers').FastTextClassifier;
 
+/**
+ * Utilities
+ */
+exports.util = () => require('./util');
+
+/**
+ * Get a new logger
+ * @param {String} name 
+ */
+exports.logger = (name) => {
+    return require('./logger')(name);
+}
+
 /**
  * Depreciated
  * Please use lower camelCase api with custom model.
  */
 exports.Langid = util.deprecate(exports.langid, '`vntk.Langid()` is depreciated, please use `vntk.langid([custom_model])` instead.')
 exports.getDictionary = util.deprecate(exports.dictionary, '`vntk.getDictionary()` is depreciated, please use `vntk.dictionary([custom_model])` instead.')
-exports.wordSent = util.deprecate(exports.wordTokenizer, '`vntk.wordSent()` is depreciated, please use `vntk.wordTokenizer([custom_model])` instead.')
+exports.wordSent = util.deprecate(exports.wordTokenizer, '`vntk.wordSent()` is depreciated, please use `vntk.wordTokenizer([custom_model])` instead.')
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vntk",
-  "version": "1.4.0",
+  "version": "1.4.1",
   "description": "Vietnamese NLP Toolkit for Node",
   "main": "index.js",
   "bin": {