Skip to content

Commit

Permalink
代码异常处理完善及日志修正
Browse files Browse the repository at this point in the history
修改日志框架为slf4j,方便调整日志级别及新增性能
修改代码风格为java1.7改善流关闭代码块
完善异常处理并修正代码警告
  • Loading branch information
XYUU authored and XYUU committed Aug 9, 2016
1 parent 6096c0d commit abe1bea
Show file tree
Hide file tree
Showing 39 changed files with 368 additions and 552 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public AnsjTokenizer(Analysis analysis, Reader input, Set<String> filter, boolea

@Override
public boolean incrementToken() throws IOException {
// TODO Auto-generated method stub

clearAttributes();
int position = 0;
Term term = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public AnsjAnalysis() {

@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
// TODO Auto-generated method stub

return new AnsjTokenizer(new ToAnalysis(new BufferedReader(reader)), reader, filter, pstemming);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public AnsjIndexAnalysis() {

@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
// TODO Auto-generated method stub

return new AnsjTokenizer(new IndexAnalysis(new BufferedReader(reader)), reader, filter, pstemming);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public AnsjTokenizer(Analysis ta, Reader input, Set<String> filter, boolean pste

@Override
public boolean incrementToken() throws IOException {
// TODO Auto-generated method stub

clearAttributes();
int position = 0;
Term term = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public AnsjIndexAnalysis() {

@Override
protected TokenStreamComponents createComponents(String fieldName, final Reader reader) {
// TODO Auto-generated method stub

Tokenizer tokenizer = new AnsjTokenizer(new IndexAnalysis(reader), reader, filter, pstemming);
return new TokenStreamComponents(tokenizer);
}
Expand Down
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,12 @@
<version>4.8.1</version>
<scope>test</scope>
</dependency>


<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
</dependency>
</dependencies>

<build>
Expand Down
52 changes: 9 additions & 43 deletions src/main/java/org/ansj/app/crf/MakeTrainFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;

import org.ansj.app.crf.pojo.Element;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* 生成crf 或者是 wapiti的训练语聊工具.
Expand All @@ -18,6 +19,9 @@
*
*/
public class MakeTrainFile {

public static final Logger logger = LoggerFactory.getLogger(MakeTrainFile.class);

public static void main(String[] args) {

String inputPath = "corpus.txt";
Expand All @@ -30,70 +34,32 @@ public static void main(String[] args) {
}

if (StringUtil.isBlank(inputPath) || StringUtil.isBlank(outputPath)) {
System.out.println("org.ansj.app.crf.MakeTrainFile [inputPath] [outputPath]");
logger.info("org.ansj.app.crf.MakeTrainFile [inputPath] [outputPath]");
return;
}

BufferedReader reader = null;

FileOutputStream fos = null;

try {

reader = IOUtil.getReader(inputPath, "utf-8");

fos = new FileOutputStream(outputPath);

try (BufferedReader reader = IOUtil.getReader(inputPath, "utf-8");
FileOutputStream fos = new FileOutputStream(outputPath);) {
String temp = null;

int i = 0;

while ((temp = reader.readLine()) != null) {

StringBuilder sb = new StringBuilder("\n");

if (StringUtil.isBlank(temp)) {
continue;
}

if (i == 0) {
temp = StringUtil.trim(temp);
}

List<Element> list = Config.makeToElementList(temp, "\\s+");

for (Element element : list) {
sb.append(element.nameStr() + " " + Config.getTagName(element.getTag()));
sb.append("\n");
}

fos.write(sb.toString().getBytes(IOUtil.UTF8));

System.out.println(++i);
}

} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception e1) {
e1.printStackTrace();
}
}

if (fos != null) {
try {
fos.flush();
fos.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
logger.info("发生异常", e);
}

}

}
90 changes: 31 additions & 59 deletions src/main/java/org/ansj/app/crf/Model.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
package org.ansj.app.crf;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Logger;
import java.util.zip.GZIPOutputStream;

import org.ansj.app.crf.model.CRFModel;
import org.ansj.app.crf.model.CRFppTxtModel;
import org.ansj.app.crf.model.WapitiCRFModel;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.MapCount;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class Model {

protected static final Logger LOG = Logger.getLogger("CRF");
public final Logger logger = LoggerFactory.getLogger("CRF");

protected String name;

Expand Down Expand Up @@ -49,42 +48,26 @@ public Model(String name) {
* @param path
* @return
* @return
* @throws FileNotFoundException
* @throws IOException
* @throws ClassNotFoundException
* @throws Exception
*/
public static Model load(String name, String modelPath) throws Exception {
InputStream is = null;
try {

Model model = new CRFModel(name);

if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}

model = new CRFppTxtModel(name);

if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}

model = new WapitiCRFModel(name);

if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}
} finally {
if (is != null) {
is.close();
}
Model model = new CRFModel(name);
if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}
model = new CRFppTxtModel(name);

throw new Exception("I did not know waht type of model by file " + modelPath);

if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}
model = new WapitiCRFModel(name);
if (model.checkModel(modelPath)) {
model.loadModel(modelPath);
return model;
}
throw new Exception("I did not know what type of model by file " + modelPath);
}

/**
Expand Down Expand Up @@ -142,11 +125,12 @@ protected static void printFeatureTree(String cs, float[] tempW) {
if (tempW.length == 4) {
name = "U";
}

name += "*" + ((int) cs.charAt(cs.length() - 1) - Config.FEATURE_BEGIN + 1) + ":" + cs.substring(0, cs.length() - 1);
name += "*" + ((int) cs.charAt(cs.length() - 1) - Config.FEATURE_BEGIN + 1) + ":"
+ cs.substring(0, cs.length() - 1);
for (int i = 0; i < tempW.length; i++) {
if (tempW[i] != 0) {
System.out.println(name + "\t" + Config.getTagName(i / 4 - 1) + "\t" + Config.getTagName(i % 4) + "\t" + tempW[i]);
System.out.println(
name + "\t" + Config.getTagName(i / 4 - 1) + "\t" + Config.getTagName(i % 4) + "\t" + tempW[i]);
}

}
Expand All @@ -159,26 +143,17 @@ protected static void printFeatureTree(String cs, float[] tempW) {
* @throws IOException
* @throws FileNotFoundException
*/
public void writeModel(String path) throws FileNotFoundException, IOException {
ObjectOutputStream oos = null;
try {

oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(new File(path))));

public void writeModel(String path) {
try (FileOutputStream fso = new FileOutputStream(path)) {
ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(fso));
oos.writeUTF(CRFModel.version);

oos.writeObject(status);

oos.writeObject(config.getTemplate());

Map<String, float[]> map = featureTree.toMap();

MapCount<Integer> mc = new MapCount<Integer>();

for (float[] v : map.values()) {
mc.add(v.length);
}

for (Entry<Integer, Double> entry : mc.get().entrySet()) {
int win = entry.getKey();
oos.writeInt(win);// 宽度
Expand All @@ -195,14 +170,11 @@ public void writeModel(String path) throws FileNotFoundException, IOException {
}
oos.writeInt(0);
oos.writeInt(0);

} catch (Exception e) {
e.printStackTrace();
} finally {
if (oos != null) {
oos.flush();
oos.close();
}
oos.flush();
} catch (FileNotFoundException e) {
logger.warn("文件没有找到",e);
} catch (IOException e) {
logger.warn("IO异常",e);
}
}
}
Loading

0 comments on commit abe1bea

Please sign in to comment.