Skip to content

Commit

Permalink
Update the default standalone writer to Medtator format
Browse files Browse the repository at this point in the history
  • Loading branch information
hol7001 committed Aug 20, 2024
1 parent a1fc87c commit 38770ce
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/main/java/org/ohnlp/medtagger/fit/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.ohnlp.medtagger.cr.FileSystemReader;
import org.ohnlp.medtagger.ie.cc.IETabDelimitedWriter;
import org.ohnlp.medtagger.ie.cc.MedTatorWriter;

import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -43,8 +43,8 @@ public static void main(String[] args) throws Exception {

AnalysisEngineDescription writer =
AnalysisEngineFactory.createEngineDescription(
IETabDelimitedWriter.class,
IETabDelimitedWriter.PARAM_OUTPUTDIR, outputDirPath.toString());
MedTatorWriter.class,
MedTatorWriter.PARAM_OUTPUTDIR, outputDirPath.toString());

SimpleCliPipeline.runPipeline(reader, descMedTaggerTAE, writer);

Expand Down
176 changes: 176 additions & 0 deletions src/main/java/org/ohnlp/medtagger/ie/cc/MedTatorWriter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/*******************************************************************************
* Copyright: (c) 2013 Mayo Foundation for Medical Education and
* Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
* triple-shield Mayo logo are trademarks and service marks of MFMER.
*
* Except as contained in the copyright notice above, or as used to identify
* MFMER as the author of this software, the trade names, trademarks, service
* marks, or product names of the copyright holder shall not be used in
* advertising, promotion or otherwise in connection with this software without
* prior written authorization of the copyright holder.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package org.ohnlp.medtagger.ie.cc;


import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.ohnlp.medtagger.type.ConceptMention;
import org.ohnlp.typesystem.type.structured.Document;

import org.ohnlp.typesystem.type.textspan.Sentence;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

import java.net.MalformedURLException;
import java.net.URL;


/**
*
* Generate XML output format
* @author Hongfang Liu
*
*/
public class MedTatorWriter extends JCasAnnotator_ImplBase {

public static final String PARAM_OUTPUTDIR = "OutputDir";

private File mOutputDir;

private int mDocNum;

private DocumentBuilderFactory dbf;
private DocumentBuilder db;

/**
* initialize
*/
public void initialize(UimaContext aContext) throws ResourceInitializationException {

mDocNum = 0;
mOutputDir = new File((String) aContext.getConfigParameterValue(PARAM_OUTPUTDIR));
if (!mOutputDir.exists()) {
mOutputDir.mkdirs();
}
dbf = DocumentBuilderFactory.newInstance();
try {
db = dbf.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}

// print out match and concept mention
public void printAnnotationsInline(JCas jcas) throws IOException {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
FSIterator<TOP> docIterator = indexes.getAllIndexedFS(Document.type);
File outFile = null;
File inFile = null;


if (docIterator.hasNext()) {
Document docAnn = (Document) docIterator.next();
String fileLoc =docAnn.getFileLoc();
try {
inFile = new File(new URL(fileLoc).getPath());
String outFileName = inFile.getName();
outFile = new File(mOutputDir, outFileName+".xml");
} catch (MalformedURLException e1) {
}
}
if (outFile == null) {
outFile = new File(mOutputDir, "doc" + mDocNum++);
}



String toprint = "";
int tagid = 0;
// document text
String doctext = jcas.getDocumentText();

org.w3c.dom.Document doc = db.newDocument();
Element rootElement = doc.createElement("MedTagger");
doc.appendChild(rootElement);
Node cdata = doc.createCDATASection(doctext);
Element textElement = (Element) doc.createElement("TEXT");
textElement.appendChild(cdata);
rootElement.appendChild(textElement);
Element tagsElement = (Element) doc.createElement("TAGS");


// get concept mention index
FSIterator<? extends Annotation> cmIter = jcas.getAnnotationIndex(ConceptMention.type).iterator();

while (cmIter.hasNext()) {
ConceptMention cm = (ConceptMention) cmIter.next();
Element tagElement = doc.createElement("CM");
tagElement.setAttribute("spans", cm.getBegin()+"~"+cm.getEnd());
tagElement.setAttribute("text", cm.getCoveredText());
tagElement.setAttribute("id", "P"+tagid);
tagElement.setAttribute("certainty", cm.getCertainty());
tagElement.setAttribute("status", cm.getStatus());
tagElement.setAttribute("experiencer", cm.getExperiencer());
tagElement.setAttribute("semG", cm.getSemGroup());
tagElement.setAttribute("normTarget", cm.getNormTarget());
tagsElement.appendChild(tagElement);
tagid++;
}
rootElement.appendChild(tagsElement);

try{
FileOutputStream output =
new FileOutputStream(outFile);
writeXml(doc, output);
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}

private static void writeXml(org.w3c.dom.Document doc,
OutputStream output)
throws TransformerException {

TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource((Node) doc);
StreamResult result = new StreamResult(output);
transformer.transform(source, result);
}
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
try {
printAnnotationsInline(jCas);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
47 changes: 47 additions & 0 deletions src/main/resources/org/ohnlp/medtagger/ie/ccs/MedTatorWriter.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<implementationName>org.ohnlp.medtagger.ie.cc.MedTatorWriter</implementationName>
<processingResourceMetaData>
<name>org.ohnlp.medtagger.ie.ccs.MedTatorWriter</name>
<description>Print out annotation in attribute and value format</description>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>OutputDir</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>OutputDir</name>
<value>
<string>data/output/</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import name="org.ohnlp.medtagger.ie.types.MedTaggerIETypes"/>
</imports>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>false</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</processingResourceMetaData>
<resourceManagerConfiguration/>
</casConsumerDescription>

0 comments on commit 38770ce

Please sign in to comment.