Skip to content

Commit 533466e

Browse files
committed
#126 Setting up applied and identified spans.
1 parent b716900 commit 533466e

File tree

6 files changed

+26
-20
lines changed

6 files changed

+26
-20
lines changed

phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -575,19 +575,23 @@ public void endToEndJustPhoneNumbers() throws Exception {
575575

576576
final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties);
577577

578-
final String input = "his number is 123-456-7890. her number is 9999999999.";
578+
final String input = "his number is 123-456-7890. her number is 9999999999. her number is 102-304-5678.";
579579

580580
final PhileasFilterService service = new PhileasFilterService(phileasConfiguration);
581581
final FilterResponse response = service.filter(List.of("phonenumbers"), "context", "documentid", input, MimeType.TEXT_PLAIN);
582582

583583
LOGGER.info(response.filteredText());
584584

585+
LOGGER.info("Identified spans:");
586+
showSpans(response.explanation().identifiedSpans());
587+
588+
LOGGER.info("Applied spans:");
585589
showSpans(response.explanation().appliedSpans());
586590

587591
Assertions.assertEquals("documentid", response.documentId());
588-
Assertions.assertEquals(2, response.explanation().identifiedSpans().size());
589592
Assertions.assertEquals(1, response.explanation().appliedSpans().size());
590-
Assertions.assertEquals("his number is {{{REDACTED-phone-number}}}. her number is {{{REDACTED-phone-number}}}.", response.filteredText().trim());
593+
Assertions.assertEquals(3, response.explanation().identifiedSpans().size());
594+
Assertions.assertEquals("his number is {{{REDACTED-phone-number}}}. her number is 9999999999. her number is 102-304-5678.", response.filteredText().trim());
591595

592596
}
593597

phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java

+1
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ public static Policy getPolicyJustPhoneNumber(String policyName) {
394394

395395
PhoneNumber phoneNumber = new PhoneNumber();
396396
phoneNumber.setPhoneNumberFilterStrategies(List.of(phoneNumberFilterStrategy));
397+
phoneNumber.setIgnored(Set.of("102-304-5678"));
397398

398399
Identifiers identifiers = new Identifiers();
399400
identifiers.setPhoneNumber(phoneNumber);

phileas-model/src/main/java/ai/philterd/phileas/model/objects/Span.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,11 @@ public static List<Span> getIdenticalSpans(Span span, List<Span> spans) {
312312
}
313313

314314
/**
315-
* Drop overlapping spans that were for text that was ignored.
315+
* Drop unapplied spans and spans that were for text that was ignored.
316316
* @param spans A list of {@link Span spans} that may or may not contain ignored spans.
317317
* @return A list of {@link Span spans} without ignored spans.
318318
*/
319-
public static List<Span> dropIgnoredSpans(List<Span> spans) {
319+
public static List<Span> dropIgnoredAndUnappliedSpans(List<Span> spans) {
320320

321321
final List<Span> nonIgnoredSpans = new LinkedList<>();
322322

phileas-model/src/main/java/ai/philterd/phileas/model/responses/FilterResponse.java

-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import com.google.gson.Gson;
2121

2222
import java.util.*;
23-
import java.util.stream.Collectors;
2423

2524
/**
2625
* Response to a filter operation.

phileas-model/src/test/java/ai/philterd/test/phileas/model/objects/SpanTest.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public void ignored1() {
132132
spans.add(Span.make(1, 5, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", false, true, new String[0]));
133133
spans.add(Span.make(2, 12, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", true, true, new String[0]));
134134

135-
List<Span> nonIgnoredSpans = Span.dropIgnoredSpans(spans);
135+
List<Span> nonIgnoredSpans = Span.dropIgnoredAndUnappliedSpans(spans);
136136

137137
showSpans(nonIgnoredSpans);
138138

@@ -148,7 +148,7 @@ public void ignored2() {
148148
spans.add(Span.make(1, 5, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", false, true, new String[0]));
149149
spans.add(Span.make(2, 12, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", false, true, new String[0]));
150150

151-
List<Span> nonIgnoredSpans = Span.dropIgnoredSpans(spans);
151+
List<Span> nonIgnoredSpans = Span.dropIgnoredAndUnappliedSpans(spans);
152152

153153
showSpans(nonIgnoredSpans);
154154

@@ -165,7 +165,7 @@ public void ignored3() {
165165
spans.add(Span.make(1, 5, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", true, true, new String[0]));
166166
spans.add(Span.make(2, 12, FilterType.PERSON, "context", "document", 1.0, "test", "***", "salt", true, true, new String[0]));
167167

168-
List<Span> nonIgnoredSpans = Span.dropIgnoredSpans(spans);
168+
List<Span> nonIgnoredSpans = Span.dropIgnoredAndUnappliedSpans(spans);
169169

170170
showSpans(nonIgnoredSpans);
171171

phileas-processors/phileas-processors-unstructured/src/main/java/ai/philterd/phileas/processors/unstructured/UnstructuredDocumentProcessor.java

+13-11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import java.util.LinkedList;
3131
import java.util.List;
3232
import java.util.Map;
33+
import java.util.function.Predicate;
3334

3435
import static java.util.stream.Collectors.toList;
3536

@@ -55,7 +56,7 @@ public FilterResponse process(final Policy policy, final List<Filter> filters, f
5556
final Map<String, String> attributes) throws Exception {
5657

5758
// The list that will contain the spans containing PHI/PII.
58-
List<Span> spans = new LinkedList<>();
59+
List<Span> identifiedSpans = new LinkedList<>();
5960

6061
// Apply each filter.
6162
for(final Filter filter : filters) {
@@ -66,27 +67,27 @@ public FilterResponse process(final Policy policy, final List<Filter> filters, f
6667

6768
metricsService.logFilterTime(filter.getFilterType(), elapsedTimeMs);
6869

69-
spans.addAll(filterResult.getSpans());
70+
identifiedSpans.addAll(filterResult.getSpans());
7071

7172
}
7273

7374
// Drop ignored spans.
74-
spans = Span.dropIgnoredSpans(spans);
75+
//identifiedSpans = Span.dropIgnoredAndUnappliedSpans(identifiedSpans);
7576

7677
// Perform span disambiguation.
7778
if(spanDisambiguationService.isEnabled()) {
78-
spans = spanDisambiguationService.disambiguate(context, spans);
79+
identifiedSpans = spanDisambiguationService.disambiguate(context, identifiedSpans);
7980
}
8081

8182
// Drop overlapping spans.
82-
spans = Span.dropOverlappingSpans(spans);
83+
identifiedSpans = Span.dropOverlappingSpans(identifiedSpans);
8384

8485
// Sort the spans based on the confidence.
85-
spans.sort(Comparator.comparing(Span::getConfidence));
86+
identifiedSpans.sort(Comparator.comparing(Span::getConfidence));
8687

8788
// Perform post-filtering on the spans.
8889
for(final PostFilter postFilter : postFilters) {
89-
spans = postFilter.filter(input, spans);
90+
identifiedSpans = postFilter.filter(input, identifiedSpans);
9091
}
9192

9293
// PHL-185: Remove non-adjacent firstname/surname spans.
@@ -127,15 +128,16 @@ public FilterResponse process(final Policy policy, final List<Filter> filters, f
127128

128129
// The spans that will be persisted. Has to be a deep copy because the shift
129130
// below will change the indexes. Doing this to save the original locations of the spans.
130-
final List<Span> appliedSpans = spans.stream().map(Span::copy).collect(toList());
131+
final List<Span> appliedSpans = identifiedSpans.stream().filter(Span::isApplied)
132+
.filter(Predicate.not(Span::isIgnored)).map(Span::copy).collect(toList());
131133

132134
// TODO: Set a flag on each "span" not in appliedSpans indicating it was not used.
133135

134136
// Log a metric for each filter type.
135137
appliedSpans.forEach(k -> metricsService.incrementFilterType(k.getFilterType()));
136138

137139
// Define the explanation.
138-
final Explanation explanation = new Explanation(appliedSpans, spans);
140+
final Explanation explanation = new Explanation(appliedSpans, identifiedSpans);
139141

140142
// Used to manipulate the text.
141143
final StringBuilder sb = new StringBuilder(input);
@@ -148,7 +150,7 @@ public FilterResponse process(final Policy policy, final List<Filter> filters, f
148150
for(int i = 0; i < stringLength; i++) {
149151

150152
// Is index i the start of a span?
151-
final Span span = Span.doesIndexStartSpan(i, spans);
153+
final Span span = Span.doesIndexStartSpan(i, appliedSpans);
152154

153155
if(span != null) {
154156

@@ -166,7 +168,7 @@ public FilterResponse process(final Policy policy, final List<Filter> filters, f
166168
final int shift = (spanLength - replacementLength) * -1;
167169

168170
// Shift the remaining spans by the shift value.
169-
spans = Span.shiftSpans(shift, span, spans);
171+
identifiedSpans = Span.shiftSpans(shift, span, identifiedSpans);
170172

171173
// Update the length of the string.
172174
stringLength += shift;

0 commit comments

Comments
 (0)