Skip to content

Commit

Permalink
Fix/free text (#54)
Browse files Browse the repository at this point in the history
* add newline to test

* fix: freetext translation

* feature: add FHIR dependency and update others

* feature: fix freetext translation, add HAPI FHIR validation and optimize build
  • Loading branch information
PierreDelpy authored Oct 29, 2024
1 parent 3903ade commit c23c39c
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 81 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
**
!pom.xml
!Dockerfile
!src/main/
!src/docker/
35 changes: 18 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
FROM maven:3-eclipse-temurin-17 AS build
FROM maven:3.9.9-eclipse-temurin-17 AS build

WORKDIR /app
COPY . ./
RUN mvn clean install -U
COPY pom.xml ./
COPY src ./src
RUN mvn clean install -U -DskipTests

FROM bellsoft/liberica-openjre-alpine:17
COPY --from=build /app/target/obds2fhir*with-dependencies.jar /obds2fhir/obds2fhir.jar
ADD src/docker/start.sh /obds2fhir/
COPY src/docker/start.sh /obds2fhir/
RUN chmod +x /obds2fhir/start.sh
ENTRYPOINT ["obds2fhir/start.sh"]


ENV FILE_PATH="/obds2fhir/clinical_data"
ENV STORE_PATH="http://blaze:8090/fhir"
ENV STORE_AUTH=""
ENV IDENTIFIER_SYSTEM="http://dktk.dkfz.de/fhir/onco/core/CodeSystem/PseudonymArtCS"
ENV LOG_LEVEL="INFO"
ENV MAINZELLISTE_URL="http://host.docker.internal:8080"
ENV MAINZELLISTE_APIKEY=""
ENV IDTYPE=""
ENV SALT="createLocalCustomSalt"
ENV SSL_CERTIFICATE_VALIDATION="true"
ENV ADD_DEPARTMENTS="false"
ENV WAIT_FOR_CONNECTION="false"
ENV KEEP_INTERNAL_ID="false"
ENV FILE_PATH="/obds2fhir/clinical_data" \
STORE_PATH="http://blaze:8090/fhir" \
STORE_AUTH="" \
IDENTIFIER_SYSTEM="http://dktk.dkfz.de/fhir/onco/core/CodeSystem/PseudonymArtCS" \
LOG_LEVEL="INFO" \
MAINZELLISTE_URL="http://host.docker.internal:8080" \
MAINZELLISTE_APIKEY="" \
IDTYPE="" \
SALT="createLocalCustomSalt" \
SSL_CERTIFICATE_VALIDATION="true" \
ADD_DEPARTMENTS="false" \
WAIT_FOR_CONNECTION="false" \
KEEP_INTERNAL_ID="false"
55 changes: 38 additions & 17 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<slf4j-simple.version>2.0.16</slf4j-simple.version>
<slf4j.version>2.0.16</slf4j.version>
<hapi.fhir.version>7.4.5</hapi.fhir.version>
</properties>

<build>
<plugins>
<plugin>
Expand All @@ -29,6 +30,7 @@
</archive>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
Expand All @@ -52,9 +54,11 @@
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.4.0</version>
<version>3.5.1</version>
</plugin>
</plugins>
</build>
Expand All @@ -65,12 +69,13 @@
<artifactId>Saxon-HE</artifactId>
<version>12.5</version>
</dependency>

<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.3.1</version>
<scope>compile</scope>
<version>5.4</version>
</dependency>

<dependency>
<groupId>de.pseudonymisierung</groupId>
<artifactId>mainzelliste-client</artifactId>
Expand All @@ -82,55 +87,71 @@
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
<version>1.5.4</version>
</dependency>

<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.4.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.11.0</version>
<version>5.11.3</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-inline</artifactId>
<version>5.2.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>uk.org.webcompere</groupId>
<artifactId>system-stubs-jupiter</artifactId>
<version>2.1.6</version>
<version>2.1.7</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<version>2.17.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-structures-r4</artifactId>
<version>${hapi.fhir.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-base</artifactId>
<version>${hapi.fhir.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j-simple.version}</version>
<version>${slf4j.version}</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>${slf4j-simple.version}</version>
<version>${slf4j.version}</version>
</dependency>
</dependencies>
</project>
</project>
2 changes: 1 addition & 1 deletion src/docker/start.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env sh
#!/bin/sh

echo "Checking required input and output directories..."
directories="InputData Processed tmp tmp/oBDS_Patients tmp/ADT_Patients tmp/FHIR_Patients tmp/erroneous"
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/MDS2FHIR.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -2356,6 +2356,6 @@

<xsl:function name="mds2fhir:fix-free-text">
<xsl:param name="text" />
<xsl:value-of select="replace(replace(replace(replace(replace(replace(translate($text,' ', '_'), 'ä', 'ae'), 'Ä', 'Ae'), 'ö', 'oe'), 'Ö', 'Oe'), 'ü', 'ue'), 'Ü', 'Ue')"/>
<xsl:value-of select="replace($text, '[^a-zA-Z0-9äöüÄÖÜéÉèÈêÊàÀçÇñÑ\-_.,()]+', '_')"/>
</xsl:function>
</xsl:stylesheet>
43 changes: 22 additions & 21 deletions src/test/java/de/samply/obds2fhir/Obds2fhirTests.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package de.samply.obds2fhir;

import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.parser.IParser;
import org.hl7.fhir.r4.model.Bundle;
import org.apache.commons.codec.digest.DigestUtils;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import uk.org.webcompere.systemstubs.environment.EnvironmentVariables;
import uk.org.webcompere.systemstubs.jupiter.SystemStub;
import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
Expand All @@ -18,18 +20,17 @@
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class Obds2fhirTests {
String pathWithFile=this.getClass().getClassLoader().getResource("clinical_data/InputData/File-1-ADT2_Testpatient.xml").getPath();
private final FhirContext fhirContext = FhirContext.forR4();;
private final String pathWithFile=this.getClass().getClassLoader().getResource("clinical_data/InputData/File-1-ADT2_Testpatient.xml").getPath();
@SystemStub
private EnvironmentVariables environmentVariables =
new EnvironmentVariables("FILE_PATH", pathWithFile.substring(0, pathWithFile.indexOf("InputData")));
private EnvironmentVariables environmentVariables = new EnvironmentVariables("FILE_PATH", pathWithFile.substring(0, pathWithFile.indexOf("InputData")));
@Test
@Order(1)
public void applyTransformation(){
Obds2fhir obds2fhir = new Obds2fhir();
obds2fhir.initializeTransformers(false);
obds2fhir.processXmlFiles("/InputData/",1);
obds2fhir.processXmlFiles("/tmp/oBDS_Patients/", 2);
obds2fhir.processXmlFiles("/tmp/ADT_Patients/", 2);
Obds2fhir.initializeTransformers(false);
Obds2fhir.processXmlFiles("/InputData/",1);
Obds2fhir.processXmlFiles("/tmp/oBDS_Patients/", 2);
Obds2fhir.processXmlFiles("/tmp/ADT_Patients/", 2);
assertTrue(new File(System.getenv("FILE_PATH")).exists());
}
@Test
Expand Down Expand Up @@ -89,23 +90,23 @@ public void compareSampleOBDS () throws IOException {
assertTrue(compare(result, expected));
}
private Boolean compare(String resultvar, String expectedvar) throws IOException {
String result = System.getenv("FILE_PATH")+resultvar;
String expected = this.getClass().getClassLoader().getResource(expectedvar).getPath();
String resultString = Files.readString(Paths.get(String.valueOf(new File(result))));
String expectedString = Files.readString(Paths.get(String.valueOf(new File(expected))));
resultString=replaceAllIds(resultString);
expectedString=replaceAllIds(expectedString);
return (resultString.equals(expectedString));
String resultString = replaceAllIds(Files.readString(Paths.get(String.valueOf(new File(System.getenv("FILE_PATH")+resultvar)))));
String expectedString = replaceAllIds(Files.readString(Paths.get(String.valueOf(new File(this.getClass().getClassLoader().getResource(expectedvar).getPath())))));
IParser xmlParser = fhirContext.newXmlParser();
Bundle resultBundle = xmlParser.parseResource(Bundle.class, resultString);
Bundle expectedBundle = xmlParser.parseResource(Bundle.class, expectedString);
return(resultBundle.equalsDeep(expectedBundle));
}
private String replaceAllIds(String bundle){

private String replaceAllIds(String bundle) {
//repalce ids
String result = bundle.replaceAll("([/\"])([a-z0-9]{16,23}[ADToBDS-]{0,5}[-0-9]{0,2})(\")","$1replaced-id$3");
String result = bundle.replaceAll("([/\"])([a-z0-9]{16,23}[ADToBDS-]{0,5}[-0-9]{0,2})(\")", "$1replaced-id$3");
//replace pseudonym
result = result.replaceAll("(<value value=\")(.{1,32})\"","$1replaced-pseudonym\"");
result = result.replaceAll("(<value value=\")(.{1,32})\"", "1");
//replace separator
result = result.replaceAll("\r","");
result = result.replaceAll("\r", "");
//replace artefacts
result = result.replaceAll("<id value=\"tpatient.xml\"/>","<id value=\"replaced-id\"/>");
result = result.replaceAll("<id value=\"tpatient.xml\"/>", "<id value=\"replaced-id\"/>");
return result;
}
}
2 changes: 1 addition & 1 deletion src/test/resources/FHIR_ADT_Expected-File-1.xml
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@
<system value="urn:oid:2.16.840.1.113883.6.43.1"/>
<version value="31"/>
<code value="C61.0"/>
<display value="Primaertumor_Topographie_ICD_O_Freitext_Oe"/>
<display value="Primaertumor_Topographie_ICD_O_Freitext_Ö"/>
</coding>
<coding>
<system value="http://dktk.dkfz.de/fhir/onco/core/CodeSystem/SeitenlokalisationCS"/>
Expand Down
12 changes: 6 additions & 6 deletions src/test/resources/FHIR_ADT_Expected-File-3.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
<version value="2024"/>
<code value="C43.4"/>
</coding>
<text value="Malignes_Melanom_oberer_Ruecken_mittig"/>
<text value="Malignes_Melanom_oberer_Rücken_mittig"/>
</code>
<bodySite>
<coding>
Expand Down Expand Up @@ -272,7 +272,7 @@
<effectiveDateTime value="2024-03-02"/>
<valueCodeableConcept>
<coding>
<system value="urn:WHO-Grading_fuer_Gehirntumoren"/>
<system value="urn:WHO-Grading_für_Gehirntumoren"/>
<code value="II"/>
</coding>
</valueCodeableConcept>
Expand Down Expand Up @@ -346,7 +346,7 @@
<version value="33"/>
<code value="8721/3"/>
</coding>
<text value="Nodulaeres_malignes_Melanom_(NM)"/>
<text value="Noduläres_malignes_Melanom_(NM)"/>
</valueCodeableConcept>
<hasMember>
<reference value="Observation/grd3b517bb65fd6cddf"/>
Expand Down Expand Up @@ -1533,7 +1533,7 @@
<version value="33"/>
<code value="8721/3"/>
</coding>
<text value="Nodulaeres_malignes_Melanom_(NM)"/>
<text value="Noduläres_malignes_Melanom_(NM)"/>
</valueCodeableConcept>
<hasMember>
<reference value="Observation/grd473b8c6f78fbdbb4"/>
Expand Down Expand Up @@ -2361,7 +2361,7 @@
</extension>
<extension url="http://dktk.dkfz.de/fhir/StructureDefinition/onco-core-Extension-SystemischeTherapieProtokoll">
<valueCodeableConcept>
<text value="Atezolizumab/_Bevacizumab"/>
<text value="Atezolizumab_Bevacizumab"/>
</valueCodeableConcept>
</extension>
<status value="not-taken"/>
Expand Down Expand Up @@ -3428,7 +3428,7 @@
<effectiveDateTime value="2023-07-18"/>
<valueCodeableConcept>
<coding>
<system value="urn:problematic_duplicate:_diagnosis_and_progress"/>
<system value="urn:problematic_duplicate_diagnosis_and_progress"/>
<code value="I"/>
</coding>
</valueCodeableConcept>
Expand Down
Loading

0 comments on commit c23c39c

Please sign in to comment.