Skip to content

Commit

Permalink
修复learntool部分发现用户自定义次点的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
孙健 committed Mar 4, 2017
1 parent c9dab2c commit dd34a3b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<artifactId>ansj_seg</artifactId>
<packaging>jar</packaging>
<name>ansj_seg</name>
<version>5.1.1</version>
<version>5.1.2</version>
<description>best java chinese word seg ! </description>
<url>https://github.com/NLPchina/ansj_seg</url>
<licenses>
Expand Down
32 changes: 23 additions & 9 deletions src/main/java/org/ansj/dic/LearnTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
import org.ansj.app.crf.SplitWord;
import org.ansj.domain.Nature;
import org.ansj.domain.NewWord;
import org.ansj.domain.TermNatures;
import org.ansj.recognition.arrimpl.AsianPersonRecognition;
import org.ansj.recognition.arrimpl.ForeignPersonRecognition;
import org.ansj.recognition.impl.NatureRecognition;
import org.ansj.util.Graph;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.CollectionUtil;

Expand Down Expand Up @@ -40,15 +43,22 @@ public class LearnTool {
*/
private final SmartForest<NewWord> sf = new SmartForest<NewWord>();

/**
* 学习新词排除用户自定义词典那中的词语
*/
private Forest[] forests;

/**
* 公司名称学习.
*
* @param graph
*/
public void learn(Graph graph, SplitWord splitWord) {
public void learn(Graph graph, SplitWord splitWord, Forest... forests) {

this.splitWord = splitWord;

this.forests = forests;

// 亚洲人名识别
if (isAsianName) {
findAsianPerson(graph);
Expand Down Expand Up @@ -76,7 +86,12 @@ private void addListToTerm(List<NewWord> newWords) {
if (newWords.size() == 0)
return;
for (NewWord newWord : newWords) {
addTerm(newWord);

TermNatures termNatures = new NatureRecognition(forests).getTermNatures(newWord.getName());

if (termNatures == TermNatures.NULL) {
addTerm(newWord);
}
}
}

Expand All @@ -93,12 +108,12 @@ public void addTerm(NewWord newWord) {
temp.update(newWord.getNature(), newWord.getAllFreq());
} else {
count++;
if(splitWord==null){
if (splitWord == null) {
newWord.setScore(-1);
}else{
newWord.setScore(-splitWord.cohesion(newWord.getName()));
} else {
newWord.setScore(-splitWord.cohesion(newWord.getName()));
}

synchronized (sf) {
sf.add(newWord.getName(), newWord);
}
Expand All @@ -112,8 +127,7 @@ public SmartForest<NewWord> getForest() {
/**
* 返回学习到的新词.
*
* @param num
* 返回数目.0为全部返回
* @param num 返回数目.0为全部返回
* @return
*/
public List<Entry<String, Double>> getTopTree(int num) {
Expand All @@ -138,7 +152,7 @@ public List<Entry<String, Double>> getTopTree(int num, Nature nature) {
}

private void valueResult(SmartForest<NewWord> smartForest, HashMap<String, Double> hm, Nature nature) {

if (smartForest == null || smartForest.branches == null) {
return;
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/ansj/splitWord/analysis/NlpAnalysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public List<Term> merger() {

graph.walkPath();

learn.learn(graph, splitWord);
learn.learn(graph, splitWord ,forests);

// 姓名识别
if (graph.hasPerson && isNameRecognition) {
Expand Down

0 comments on commit dd34a3b

Please sign in to comment.