Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Getting spans into applied and identified lists appropriately #127

Merged
merged 8 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ jobs:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
packages: write
steps:
- uses: actions/checkout@v3
with:
lfs: true
lfs: true
- name: Cache Maven packages
uses: actions/cache@v1
with:
Expand All @@ -26,8 +26,14 @@ jobs:
server-id: philterd-repository-snapshots
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
- name: Build and Deploy
run: mvn --batch-mode --update-snapshots test deploy
- name: Build
run: mvn --batch-mode --update-snapshots test
env:
MAVEN_USERNAME: ${{ secrets.PHILTERD_ARTIFACTS_USER }}
MAVEN_PASSWORD: ${{ secrets.PHILTERD_ARTIFACTS_TOKEN }}
- name: Deploy
if: ${{ github.ref == 'refs/heads/main' }}
run: mvn --batch-mode --update-snapshots package deploy
env:
MAVEN_USERNAME: ${{ secrets.PHILTERD_ARTIFACTS_USER }}
MAVEN_PASSWORD: ${{ secrets.PHILTERD_ARTIFACTS_TOKEN }}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public FilterResult filter(final Policy policy, final String context, final Stri
replacement.getReplacement(),
replacement.getSalt(),
isIgnored,
replacement.isApplied(),
window
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public FilterResult filter(final Policy policy, final String context, final Stri
replacement.getReplacement(),
replacement.getSalt(),
isIgnored,
replacement.isApplied(),
window
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ private Span createSpan(final Policy policy, final String context, final String
// Is this term ignored?
final boolean ignored = isIgnored(text);

return Span.make(start, end, FilterType.PERSON, context, documentId, confidence, text, replacement.getReplacement(), replacement.getSalt(), ignored, window);
return Span.make(start, end, FilterType.PERSON, context, documentId, confidence, text,
replacement.getReplacement(), replacement.getSalt(), ignored, replacement.isApplied(), window);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ public FilterResult filter(final Policy policy, final String context, final Stri

final String[] window = getWindow(input, match.start(), match.end());
final String classification = "";
final Replacement replacement = getReplacement(policy, context, documentId, text, window, confidence, classification, attributes, null);
final Replacement replacement = getReplacement(policy, context, documentId, text, window, confidence,
classification, attributes, null);
final boolean isIgnored = ignored.contains(text);

spans.add(Span.make(match.start(), match.end(), getFilterType(), context, documentId, confidence, text, replacement.getReplacement(), replacement.getSalt(), isIgnored, window));
spans.add(Span.make(match.start(), match.end(), getFilterType(), context, documentId, confidence,
text, replacement.getReplacement(), replacement.getSalt(), isIgnored, replacement.isApplied(), window));

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,7 @@
import java.util.List;
import java.util.Properties;

import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPdfFilterWithPersonPolicy;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicy;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustCreditCard;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustIdentifier;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustStreetAddress;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyWithSentiment;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyZipCodeWithIgnored;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyZipCodeWithIgnoredFromFile;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.*;

@Disabled("Some of these tests require a running philter-ner service")
public class EndToEndTests {
Expand Down Expand Up @@ -566,6 +559,42 @@ public void endToEnd17() throws Exception {

}

@Test
public void endToEndJustPhoneNumbers() throws Exception {

final Path temp = Files.createTempDirectory("philter");
final File file = Paths.get(temp.toFile().getAbsolutePath(), "phonenumbers.json").toFile();
LOGGER.info("Writing policy to {}", file.getAbsolutePath());
final String policy = gson.toJson(getPolicyJustPhoneNumber("phonenumbers"));
LOGGER.info(policy);
FileUtils.writeStringToFile(file, policy, Charset.defaultCharset());

Properties properties = new Properties();
properties.setProperty("indexes.directory", INDEXES_DIRECTORY);
properties.setProperty("filter.policies.directory", temp.toFile().getAbsolutePath());

final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties);

final String input = "his number is 123-456-7890. her number is 9999999999. her number is 102-304-5678.";

final PhileasFilterService service = new PhileasFilterService(phileasConfiguration);
final FilterResponse response = service.filter(List.of("phonenumbers"), "context", "documentid", input, MimeType.TEXT_PLAIN);

LOGGER.info(response.filteredText());

LOGGER.info("Identified spans:");
showSpans(response.explanation().identifiedSpans());

LOGGER.info("Applied spans:");
showSpans(response.explanation().appliedSpans());

Assertions.assertEquals("documentid", response.documentId());
Assertions.assertEquals(1, response.explanation().appliedSpans().size());
Assertions.assertEquals(3, response.explanation().identifiedSpans().size());
Assertions.assertEquals("his number is {{{REDACTED-phone-number}}}. her number is 9999999999. her number is 102-304-5678.", response.filteredText().trim());

}

@Test
public void endToEndWithPolicyAsObject() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,12 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
AgeFilterStrategy ageFilterStrategy = new AgeFilterStrategy();

Age age = new Age();
age.setAgeFilterStrategies(Arrays.asList(ageFilterStrategy));
age.setAgeFilterStrategies(List.of(ageFilterStrategy));

CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy();

CreditCard creditCard = new CreditCard();
creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy));
creditCard.setCreditCardFilterStrategies(List.of(creditCardFilterStrategy));

DateFilterStrategy dateFilterStrategy = new DateFilterStrategy();
dateFilterStrategy.setStrategy(AbstractFilterStrategy.SHIFT);
Expand All @@ -213,15 +213,15 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
dateFilterStrategy.setShiftDays(1);

Date date = new Date();
date.setDateFilterStrategies(Arrays.asList(dateFilterStrategy));
date.setDateFilterStrategies(List.of(dateFilterStrategy));

EmailAddressFilterStrategy emailAddressFilterStrategy = new EmailAddressFilterStrategy();

EmailAddress emailAddress = new EmailAddress();
emailAddress.setEmailAddressFilterStrategies(Arrays.asList(emailAddressFilterStrategy));
emailAddress.setEmailAddressFilterStrategies(List.of(emailAddressFilterStrategy));

Identifier identifier1 = new Identifier();
identifier1.setIdentifierFilterStrategies(Arrays.asList(new IdentifierFilterStrategy()));
identifier1.setIdentifierFilterStrategies(List.of(new IdentifierFilterStrategy()));
identifier1.setPattern("asdfasdfasdf");
identifier1.setCaseSensitive(true);

Expand All @@ -231,12 +231,12 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
Identifier identifier2 = new Identifier();
identifier2.setPattern("JEFF");
identifier2.setCaseSensitive(true);
identifier2.setIdentifierFilterStrategies(Arrays.asList(identifier2FilterStrategy));
identifier2.setIdentifierFilterStrategies(List.of(identifier2FilterStrategy));

IpAddressFilterStrategy ipAddressFilterStrategy = new IpAddressFilterStrategy();

IpAddress ipAddress = new IpAddress();
ipAddress.setIpAddressFilterStrategies(Arrays.asList(ipAddressFilterStrategy));
ipAddress.setIpAddressFilterStrategies(List.of(ipAddressFilterStrategy));

PhoneNumberFilterStrategy phoneNumberFilterStrategy = new PhoneNumberFilterStrategy();

Expand Down Expand Up @@ -271,13 +271,13 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE

PersonsFilterStrategy personsFilterStrategy = new PersonsFilterStrategy();

final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
/*final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/vocab.txt").toURI());

PersonV2 personV2 = new PersonV2();
personV2.setModel(model.getAbsolutePath());
personV2.setVocab(vocab.getAbsolutePath());
personV2.setPersonFilterStrategies(List.of(personsFilterStrategy));
personV2.setPersonFilterStrategies(List.of(personsFilterStrategy));*/

// ----------------------------------------------------------------------------------

Expand Down Expand Up @@ -326,7 +326,7 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
identifiers.setEmailAddress(emailAddress);
identifiers.setIdentifiers(Arrays.asList(identifier1, identifier2));
identifiers.setIpAddress(ipAddress);
identifiers.setPersonV2(personV2);
//identifiers.setPersonV2(personV2);
identifiers.setPhoneNumber(phoneNumber);
identifiers.setSsn(ssn);
//identifiers.setStateAbbreviation(stateAbbreviation);
Expand Down Expand Up @@ -387,4 +387,24 @@ public static Policy getPolicyJustStreetAddress(String policyName) {

}

public static Policy getPolicyJustPhoneNumber(String policyName) {

PhoneNumberFilterStrategy phoneNumberFilterStrategy = new PhoneNumberFilterStrategy();
phoneNumberFilterStrategy.setConditions("confidence > 0.70");

PhoneNumber phoneNumber = new PhoneNumber();
phoneNumber.setPhoneNumberFilterStrategies(List.of(phoneNumberFilterStrategy));
phoneNumber.setIgnored(Set.of("102-304-5678"));

Identifiers identifiers = new Identifiers();
identifiers.setPhoneNumber(phoneNumber);

Policy policy = new Policy();
policy.setName(policyName);
policy.setIdentifiers(identifiers);

return policy;

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public void policyWithPlaceholder() throws IOException, URISyntaxException {
ignored.setTerms(Arrays.asList("john", "jeff", "${USER}"));

final Policy policy = getPolicy("placeholder");
policy.setIgnored(Arrays.asList(ignored));
policy.setIgnored(List.of(ignored));
final String json = gson.toJson(policy);
LOGGER.info(json);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

Expand All @@ -36,6 +37,7 @@
import java.util.List;
import java.util.Map;

@Disabled
public class PersonsV3FilterTest extends AbstractFilterTest {

private static final Logger LOGGER = LogManager.getLogger(PersonsV3FilterTest.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public void ignored1() {
policy.setIgnoredPatterns(Arrays.asList(ignoredPattern));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "AB01", "*****", "", false, new String[0]));
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "AB01", "*****", "", false, true, new String[0]));

final IgnoredPatternsFilter ignoredPatternsFilter = new IgnoredPatternsFilter(Arrays.asList(ignoredPattern));
final List<Span> filteredSpans = ignoredPatternsFilter.filter("ID is AB01.", spans);
Expand All @@ -60,7 +60,7 @@ public void notIgnored1() {
policy.setIgnoredPatterns(Arrays.asList(ignoredPattern));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "Ab01", "*****", "", false, new String[0]));
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "Ab01", "*****", "", false, true, new String[0]));

final IgnoredPatternsFilter ignoredPatternsFilter = new IgnoredPatternsFilter(Arrays.asList(ignoredPattern));
final List<Span> filteredSpans = ignoredPatternsFilter.filter("ID is Ab01.", spans);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public void ignored() throws IOException {
policy.setIgnored(Arrays.asList(ignored));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -58,7 +58,7 @@ public void ignoredFile1() throws IOException {
ignored.setFiles(Arrays.asList(new File("src/test/resources/ignored-terms.txt").getAbsolutePath()));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in samuel.", spans);
Expand All @@ -74,7 +74,7 @@ public void ignoredFile2() throws IOException {
ignored.setFiles(Arrays.asList(new File("src/test/resources/ignored-terms.txt").getAbsolutePath()));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in samuel.", spans);
Expand Down Expand Up @@ -102,7 +102,7 @@ public void notIgnored() throws IOException {
ignored.setTerms(Arrays.asList("Seattle", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -118,7 +118,7 @@ public void caseSensitive1Test() throws IOException {
ignored.setTerms(Arrays.asList("washington", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -134,7 +134,7 @@ public void caseSensitive2Test() throws IOException {
ignored.setTerms(Arrays.asList("Washington", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class TrailingNewLinePostFilterTest extends AbstractFilterTest {
public void test1() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand All @@ -47,7 +47,7 @@ public void test1() {
public void test2() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand All @@ -63,7 +63,7 @@ public void test2() {
public void test3() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n\n", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n\n", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand Down
Loading
Loading