Skip to content

Commit

Permalink
重构了同义词功能
Browse files Browse the repository at this point in the history
  • Loading branch information
孙健 committed Oct 25, 2016
1 parent c82e644 commit c2cfbc4
Show file tree
Hide file tree
Showing 23 changed files with 357 additions and 161 deletions.
20 changes: 0 additions & 20 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,6 @@
<scope>compile</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.21</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/ansj/app/crf/MakeTrainFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import java.util.List;

import org.ansj.app.crf.pojo.Element;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 生成crf 或者是 wapiti的训练语聊工具.
Expand All @@ -22,7 +22,7 @@
*/
public class MakeTrainFile {

public static final Logger logger = LoggerFactory.getLogger(MakeTrainFile.class);
private static final Log logger = MyStaticValue.getLog();

public static void main(String[] args) {

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/ansj/app/crf/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
import org.ansj.app.crf.model.CRFModel;
import org.ansj.app.crf.model.CRFppTxtModel;
import org.ansj.app.crf.model.WapitiCRFModel;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.MapCount;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

public abstract class Model {

public final Logger logger = LoggerFactory.getLogger("CRF");
public static final Log logger = MyStaticValue.getLog() ;

protected String name;

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/ansj/app/crf/model/CRFModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void loadModel(InputStream is) throws Exception {
featureTree.add(name, value);
}
} while (win == 0 || size == 0);
logger.info("load crf model ok ! use time :{}", System.currentTimeMillis() - start);
logger.info("load crf model ok ! use time :" + (System.currentTimeMillis() - start));
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/ansj/app/crf/model/CRFppTxtModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ public void loadModel(String modelPath) throws Exception {
for (int[] t1 : config.getTemplate()) {
sb.append(Arrays.toString(t1) + " ");
}
logger.info("load template ok template : {}", sb);
logger.info("load template ok template : "+ sb);
TreeMap<Integer, Pair<String, String>> featureNames = loadFeatureName(featureIndex, reader);
logger.info("load feature ok feature size : {}", featureNames.size());
logger.info("load feature ok feature size : "+ featureNames.size());
loadFeatureWeight(reader, statusCoven, featureNames);
logger.info("load crfpp model ok ! use time :{}", (System.currentTimeMillis() - start));
logger.info("load crfpp model ok ! use time : "+ (System.currentTimeMillis() - start));
}

/**
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/ansj/app/crf/model/WapitiCRFModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,20 @@ public void loadModel(String modelPath) throws Exception {
sb.append(Arrays.toString(t1) + " ");
}

logger.info("featureIndex is {}", featureIndex);
logger.info("load template ok template : {}", sb);
logger.info("featureIndex is "+ featureIndex);
logger.info("load template ok template : "+ sb);

int[] statusCoven = loadTagCoven(br);

List<Pair<String, String>> loadFeatureName = loadFeatureName(featureIndex, br);

logger.info("load feature ok feature size : {}", loadFeatureName.size());
logger.info("load feature ok feature size : "+ loadFeatureName.size());

featureTree = new SmartForest<float[]>();

loadFeatureWeight(br, statusCoven, loadFeatureName);

logger.info("load wapiti model ok ! use time :{}", (System.currentTimeMillis() - start));
logger.info("load wapiti model ok ! use time :"+ (System.currentTimeMillis() - start));

}

Expand Down Expand Up @@ -97,7 +97,7 @@ private void loadFeatureWeight(BufferedReader br, int[] statusCoven, List<Pair<S
for (Pair<String, String> pair : featureNames) {

if (temp == null) {
logger.warn("{}\t{} not have any weight ,so skip it !", pair.getValue0(), pair.getValue1());
logger.warn(pair.getValue0()+"\t"+pair.getValue1()+" not have any weight ,so skip it !");
continue;
}

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/ansj/dic/DicReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 加载词典用的类
Expand All @@ -15,7 +15,7 @@
*/
public class DicReader {

public static final Logger logger = LoggerFactory.getLogger(DicReader.class);
private static final Log logger = MyStaticValue.getLog() ;

public static BufferedReader getReader(String name) {
// maven工程修改词典加载方式
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/org/ansj/domain/Term.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ public class Term implements Serializable{
private Nature nature = Nature.NULL;
//是否是一个新词
private boolean newWord ;
//同义词
private List<String> synonyms ;


private List<Term> subTerm = null;

Expand Down Expand Up @@ -272,5 +275,13 @@ public void updateTermNaturesAndNature(TermNatures termNatures) {
this.termNatures = termNatures;
this.nature = termNatures.nature ;
}

public List<String> getSynonyms() {
return synonyms;
}

public void setSynonyms(List<String> synonyms) {
this.synonyms = synonyms;
}

}
5 changes: 4 additions & 1 deletion src/main/java/org/ansj/library/DATDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.dat.DoubleArrayTire;
import org.nlpcn.commons.lang.dat.Item;
import org.nlpcn.commons.lang.util.logging.Log;

public class DATDictionary {

private static final Log logger = MyStaticValue.getLog();

/**
* 所有在词典中出现的词,并且承担简繁体转换的任务.
*/
Expand Down Expand Up @@ -61,7 +64,7 @@ private static DoubleArrayTire loadDAT() {
}
// 特殊字符标准化
IN_SYSTEM['%'] = '%';
MyStaticValue.LIBRARYLOG.info("init core library ok use time :{}", System.currentTimeMillis() - start);
logger.info("init core library ok use time : " + (System.currentTimeMillis() - start));
return dat;
} catch (InstantiationException e) {
MyStaticValue.LIBRARYLOG.warn("无法实例化", e);
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/org/ansj/library/NatureLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import org.ansj.domain.Term;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 这里封装了词性和词性之间的关系.以及词性的索引.这是个好东西. 里面数组是从ict里面找来的. 不是很新.没有语料无法训练
Expand All @@ -19,7 +18,7 @@
*/
public class NatureLibrary {

public static final Logger logger = LoggerFactory.getLogger(NatureLibrary.class);
private static final Log logger = MyStaticValue.getLog();

private static final int YI = 1;
private static final int FYI = -1;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/ansj/library/NgramLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class NgramLibrary {
static {
long start = System.currentTimeMillis();
MyStaticValue.initBigramTables();
MyStaticValue.LIBRARYLOG.info("init ngram ok use time :{}", System.currentTimeMillis() - start);
MyStaticValue.LIBRARYLOG.info("init ngram ok use time :" + (System.currentTimeMillis() - start));
}

/**
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/ansj/library/UserDefineLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ private static void initAmbiguityLibrary() {
} catch (UnsupportedEncodingException e) {
LIBRARYLOG.warn("不支持的编码", e);
} catch (IOException e) {
LIBRARYLOG.warn("Init ambiguity library error :{}, path: {}", e.getMessage(), file.getPath());
LIBRARYLOG.warn("Init ambiguity library error :"+ e.getMessage()+", path: "+ file.getPath());
}
}

LIBRARYLOG.info("Init ambiguity library ok!");

} else {
LIBRARYLOG.warn("Init ambiguity library warning :{} because : file not found or failed to read !", MyStaticValue.ambiguityLibrary);
LIBRARYLOG.warn("Init ambiguity library warning :"+MyStaticValue.ambiguityLibrary+" because : file not found or failed to read !");
}

}
Expand Down Expand Up @@ -146,15 +146,15 @@ public static void loadLibrary(Forest forest, String path) {
} catch (UnsupportedEncodingException e) {
LIBRARYLOG.warn("不支持的编码", e);
} catch (IOException e) {
LIBRARYLOG.warn("Init user library error :{}, path: {}", e.getMessage(), file.getPath());
LIBRARYLOG.warn("Init user library error :"+e.getMessage()+", path: "+file.getPath());
}
}

LIBRARYLOG.info("Init user library ok!");


} else {
LIBRARYLOG.warn("Init user library error :{} because : not find that file !", path);
LIBRARYLOG.warn("Init user library error :"+path+" because : not find that file !");
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import java.util.HashMap;

import org.ansj.util.MyStaticValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 机构名识别词典加载类
Expand All @@ -16,7 +15,7 @@
*/
public class CompanyAttrLibrary {

public static final Logger logger = LoggerFactory.getLogger(CompanyAttrLibrary.class);
private static final Log logger = MyStaticValue.getLog();

private static HashMap<String, int[]> cnMap = null;

Expand Down
5 changes: 2 additions & 3 deletions src/main/java/org/ansj/library/name/PersonAttrLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

import org.ansj.domain.PersonNatureAttr;
import org.ansj.util.MyStaticValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 人名标注所用的词典就是简单的hashmap简单方便谁用谁知道,只在加载词典的时候用
Expand All @@ -20,7 +19,7 @@

public class PersonAttrLibrary {

public static final Logger logger = LoggerFactory.getLogger(PersonAttrLibrary.class);
private static final Log logger = MyStaticValue.getLog();

private HashMap<String, PersonNatureAttr> pnMap = null;

Expand Down
55 changes: 0 additions & 55 deletions src/main/java/org/ansj/recognition/arrimpl/SynonymsRecgnition.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import org.ansj.domain.TermNatures;
import org.ansj.library.UserDefineLibrary;
import org.ansj.recognition.TermArrRecognition;
import org.ansj.util.MyStaticValue;
import org.ansj.util.TermUtil;
import org.ansj.util.TermUtil.InsertTermType;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.nlpcn.commons.lang.util.logging.Log;

/**
* 用户自定义词典.又称补充词典
Expand All @@ -20,7 +20,7 @@
*/
public class UserDefineRecognition implements TermArrRecognition {

public final Logger logger = LoggerFactory.getLogger(getClass());
public static final Log logger = MyStaticValue.getLog();

private Term[] terms = null;

Expand Down Expand Up @@ -111,7 +111,7 @@ private int getInt(String str, int def) {
try {
return Integer.parseInt(str);
} catch (NumberFormatException e) {
logger.warn("{}不是一个数字", str, e);
logger.warn(str + "不是一个数字", e);
return def;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
*/
public class FilterRecognition implements Recognition {

/**
*
*/
private static final long serialVersionUID = 7041503137429986566L;

private Set<String> filter = new HashSet<String>();

private Set<String> natureFilter = new HashSet<String>();
Expand Down
Loading

0 comments on commit c2cfbc4

Please sign in to comment.