Skip to content

Commit

Permalink
Merge branch 'TIKA-1917' of https://github.com/manalishah/tika into T…
Browse files Browse the repository at this point in the history
…IKA-1917
  • Loading branch information
chrismattmann committed Apr 23, 2016
2 parents a353200 + f9a716a commit 9ecb183
Showing 1 changed file with 12 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import java.util.Map;
import java.util.HashMap;
import java.util.Properties;
import java.util.Iterator;
import java.util.Locale;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;

Expand All @@ -38,7 +40,7 @@
* This class offers an implementation of {@link NERecogniser} based on
* ne_chunk() module of NLTK. This NER requires additional setup,
* due to Http requests to an endpoint server that runs NLTK.
* See <a href="http://wiki.apache.org/tika/TikaAndNER#NLTK">
* See <a href="http://wiki.apache.org/tika/TikaAndNLTK">
*
*/
public class NLTKNERecogniser implements NERecogniser {
Expand Down Expand Up @@ -71,9 +73,6 @@ public NLTKNERecogniser(){
this.restHostUrlStr = restHostUrlStr;
}




Response response = WebClient.create(restHostUrlStr).accept(MediaType.TEXT_HTML).get();
int responseCode = response.getStatus();
if(responseCode == 200){
Expand Down Expand Up @@ -127,14 +126,20 @@ public Map<String, Set<String>> recognise(String text) {
String result = response.readEntity(String.class);
JSONParser parser = new JSONParser();
JSONObject j = (JSONObject) parser.parse(result);
Set s = entities.put("NAMES", new HashSet((Collection) j.get("names")));
Iterator<?> keys = j.keySet().iterator();
while( keys.hasNext() ) {
String key = (String)keys.next();
if ( !key.equals("result") ) {
ENTITY_TYPES.add(key);
entities.put(key.toUpperCase(Locale.ENGLISH), new HashSet((Collection) j.get(key)));
}
}
}
}
catch (Exception e) {
LOG.debug(e.getMessage(), e);
}
ENTITY_TYPES.clear();
ENTITY_TYPES.addAll(entities.keySet());

return entities;
}

Expand Down

0 comments on commit 9ecb183

Please sign in to comment.