diff --git a/pom.xml b/pom.xml
index 2e4f6b13..06619eec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
ansj_seg
jar
ansj_seg
- 5.1.1
+ 5.1.2
best java chinese word seg !
https://github.com/NLPchina/ansj_seg
diff --git a/src/main/java/org/ansj/dic/LearnTool.java b/src/main/java/org/ansj/dic/LearnTool.java
index 829003b9..d1e31455 100644
--- a/src/main/java/org/ansj/dic/LearnTool.java
+++ b/src/main/java/org/ansj/dic/LearnTool.java
@@ -7,9 +7,12 @@
import org.ansj.app.crf.SplitWord;
import org.ansj.domain.Nature;
import org.ansj.domain.NewWord;
+import org.ansj.domain.TermNatures;
import org.ansj.recognition.arrimpl.AsianPersonRecognition;
import org.ansj.recognition.arrimpl.ForeignPersonRecognition;
+import org.ansj.recognition.impl.NatureRecognition;
import org.ansj.util.Graph;
+import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.CollectionUtil;
@@ -40,15 +43,22 @@ public class LearnTool {
*/
private final SmartForest sf = new SmartForest();
+ /**
+ * 学习新词排除用户自定义词典那中的词语
+ */
+ private Forest[] forests;
+
/**
* 公司名称学习.
*
* @param graph
*/
- public void learn(Graph graph, SplitWord splitWord) {
+ public void learn(Graph graph, SplitWord splitWord, Forest... forests) {
this.splitWord = splitWord;
+ this.forests = forests;
+
// 亚洲人名识别
if (isAsianName) {
findAsianPerson(graph);
@@ -76,7 +86,12 @@ private void addListToTerm(List newWords) {
if (newWords.size() == 0)
return;
for (NewWord newWord : newWords) {
- addTerm(newWord);
+
+ TermNatures termNatures = new NatureRecognition(forests).getTermNatures(newWord.getName());
+
+ if (termNatures == TermNatures.NULL) {
+ addTerm(newWord);
+ }
}
}
@@ -93,12 +108,12 @@ public void addTerm(NewWord newWord) {
temp.update(newWord.getNature(), newWord.getAllFreq());
} else {
count++;
- if(splitWord==null){
+ if (splitWord == null) {
newWord.setScore(-1);
- }else{
- newWord.setScore(-splitWord.cohesion(newWord.getName()));
+ } else {
+ newWord.setScore(-splitWord.cohesion(newWord.getName()));
}
-
+
synchronized (sf) {
sf.add(newWord.getName(), newWord);
}
@@ -112,8 +127,7 @@ public SmartForest getForest() {
/**
* 返回学习到的新词.
*
- * @param num
- * 返回数目.0为全部返回
+ * @param num 返回数目.0为全部返回
* @return
*/
public List> getTopTree(int num) {
@@ -138,7 +152,7 @@ public List> getTopTree(int num, Nature nature) {
}
private void valueResult(SmartForest smartForest, HashMap hm, Nature nature) {
-
+
if (smartForest == null || smartForest.branches == null) {
return;
}
diff --git a/src/main/java/org/ansj/splitWord/analysis/NlpAnalysis.java b/src/main/java/org/ansj/splitWord/analysis/NlpAnalysis.java
index ea75d6e8..1c68da01 100644
--- a/src/main/java/org/ansj/splitWord/analysis/NlpAnalysis.java
+++ b/src/main/java/org/ansj/splitWord/analysis/NlpAnalysis.java
@@ -63,7 +63,7 @@ public List merger() {
graph.walkPath();
- learn.learn(graph, splitWord);
+ learn.learn(graph, splitWord ,forests);
// 姓名识别
if (graph.hasPerson && isNameRecognition) {