Skip to content

Commit

Permalink
Merge pull request #1051 from oncokb/update-duplicate-user-weighting
Browse files Browse the repository at this point in the history
Update duplicate user weighting
  • Loading branch information
bprize15 authored Nov 14, 2023
2 parents 5861167 + 501dfd0 commit 15b6ef4
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 27 deletions.
9 changes: 6 additions & 3 deletions src/main/java/org/mskcc/cbio/oncokb/service/UserService.java
Original file line number Diff line number Diff line change
Expand Up @@ -673,12 +673,15 @@ public List<UserDTO> getPotentialDuplicateAccountsByUser(UserDTO user) {
}

public List<UserDTO> searchAccountsForPotentialDuplicateUser(UserDTO user, List<UserDTO> allUsers) {
String userFullName = StringUtil.getFullName(user.getFirstName(), user.getLastName());
JaroWinklerSimilarity jw = new JaroWinklerSimilarity();
List<UserDTO> potentialDuplicateUsers = new ArrayList<>();
for (UserDTO potentialDuplicate : allUsers) {
String potentialDuplicateFullName = StringUtil.getFullName(potentialDuplicate.getFirstName(), potentialDuplicate.getLastName());
if (!user.getId().equals(potentialDuplicate.getId()) && jw.apply(userFullName, potentialDuplicateFullName) > .85) {
if (user.getId().equals(potentialDuplicate.getId())) {
continue;
}

Double similarity = jw.apply(user.getFirstName(), potentialDuplicate.getFirstName()) * .3 + jw.apply(user.getLastName(), potentialDuplicate.getLastName()) * .7;
if (similarity > .87) {
potentialDuplicateUsers.add(potentialDuplicate);
}
}
Expand Down
91 changes: 67 additions & 24 deletions src/test/java/org/mskcc/cbio/oncokb/service/UserServiceIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -378,31 +378,14 @@ public void assertThatUserTokenStatusIsExpected() {
}

//UNIT TESTS
@Test
public void assertThatDuplicateUsersAreCaught() {
public void testDuplicates(Pair<String, String> originalName, List<Pair<String, String>> similarNames, List<Pair<String, String>> dissimilarNames) {
Long id = 6000L;

UserDTO user = new UserDTO();
user.setFirstName("Jon");
user.setLastName("Doe");
user.setFirstName(originalName.left);
user.setLastName(originalName.right);
user.setId(id);

List<Pair<String, String>> similarNames = Arrays.asList(
new Pair<>("Jon", "Doh"),
new Pair<>("Joon", "Doe"),
new Pair<>("Jonny", "Doe"),
new Pair<>("Jonathon", "Doe"),
new Pair<>("Jon", "Doel"),
new Pair<>("Jonny", "Dole")
);
List<Pair<String, String>> dissimilarNames = Arrays.asList(
new Pair<>("Mark", "Thompson"),
new Pair<>("Emily", "Davis"),
new Pair<>("William", "Brown"),
new Pair<>("Jessica", "White"),
new Pair<>("Christopher", "Lee")
);

List<UserDTO> allUsers = new ArrayList<>();

List<UserDTO> similarUsers = new ArrayList<>();
Expand All @@ -416,13 +399,11 @@ public void assertThatDuplicateUsersAreCaught() {
allUsers.add(newUser);
}

List<UserDTO> dissimilarUsers = new ArrayList<>();
for (Pair<String, String> dissimilarName : dissimilarNames) {
UserDTO newUser = new UserDTO();
newUser.setFirstName(dissimilarName.left);
newUser.setLastName(dissimilarName.right);
newUser.setId(++id);
dissimilarUsers.add(newUser);

allUsers.add(newUser);
}
Expand All @@ -432,8 +413,70 @@ public void assertThatDuplicateUsersAreCaught() {
testSameIdUser.setLastName(user.getLastName());
testSameIdUser.setId(new Long(user.getId()));
allUsers.add(testSameIdUser);

List<UserDTO> duplicateUsers = userService.searchAccountsForPotentialDuplicateUser(user, allUsers);
assertThat(duplicateUsers).containsExactlyInAnyOrder(similarUsers.toArray(new UserDTO[similarUsers.size()]));
}
}

@Test
public void assertThatDuplicateUsersAreCaught() {
//TEST 1
Pair<String, String> originalUserName = new Pair<String,String>("Jon", "Doe");
List<Pair<String, String>> similarNames = Arrays.asList(
new Pair<>("Jon", "Doh"),
new Pair<>("Joon", "Doe"),
new Pair<>("Jonny", "Doe"),
new Pair<>("Jonathon", "Doe"),
new Pair<>("Jon", "Doel"),
new Pair<>("Jonny", "Dole")
);
List<Pair<String, String>> dissimilarNames = Arrays.asList(
new Pair<>("Mark", "Thompson"),
new Pair<>("Emily", "Davis"),
new Pair<>("William", "Brown"),
new Pair<>("Jessica", "White"),
new Pair<>("Christopher", "Lee")
);
testDuplicates(originalUserName, similarNames, dissimilarNames);

//TEST 2
originalUserName = new Pair<String,String>("Christopher", "Wardell");
similarNames = Arrays.asList(
new Pair<>("Christophe", "Wardell"),
new Pair<>("Chris", "Wardell"),
new Pair<>("Christopher", "Wardel")
);
dissimilarNames = Arrays.asList(
new Pair<>("Christopher", "Coldren"),
new Pair<>("Christoph", "Ritzel"),
new Pair<>("Christophe", "Roos"),
new Pair<>("Christoph", "Schatz"),
new Pair<>("Christopher", "Szeto"),
new Pair<>("Christopher", "Benz"),
new Pair<>("Christopher", "Edlund"),
new Pair<>("Christopher", "Hubbard"),
new Pair<>("Christopher", "Roberts"),
new Pair<>("Christopher", "Douville"),
new Pair<>("Christopher", "Krolla"),
new Pair<>("Christophe", "Lemetre"),
new Pair<>("Dan", "Wardell")
);
testDuplicates(originalUserName, similarNames, dissimilarNames);

//TEST 3
originalUserName = new Pair<String,String>("Antonio", "Galvano");
similarNames = Arrays.asList(
new Pair<>("Anton", "Galvano"),
new Pair<>("Anthony", "Galvano")
);
dissimilarNames = Arrays.asList(
new Pair<>("Antonio", "De Falco"),
new Pair<>("Antonio", "Lázaro"),
new Pair<>("Antonio", "Martinez"),
new Pair<>("Antonio", "Viana Alonso"),
new Pair<>("Antonio", "Marra"),
new Pair<>("Anthony", "Gal")
);
testDuplicates(originalUserName, similarNames, dissimilarNames);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ public class SlackControllerIT {
private static final String DEFAULT_ADDR = "[email protected]";

// Mock fields for test user
private static final String DEFAULT_USER_FIRST_NAME = "Jon";
private static final String DEFAULT_USER_LAST_NAME = "Doe";
private static final String DEFAULT_USER_EMAIL = "[email protected]";
private static final String DEFAULT_LANG_KEY = "en";
private static final String DEFAULT_COMPANY_NAME = "company name";
Expand Down Expand Up @@ -190,6 +192,8 @@ void setUp() throws IOException, SlackApiException {

// Create mock user
User mockUser = new User();
mockUser.setFirstName(DEFAULT_USER_FIRST_NAME);
mockUser.setLastName(DEFAULT_USER_LAST_NAME);
mockUser.setLogin(DEFAULT_USER_EMAIL);
mockUser.setEmail(DEFAULT_USER_EMAIL);
mockUser.setPassword(passwordEncoder.encode(RandomUtil.generatePassword()));
Expand Down

0 comments on commit 15b6ef4

Please sign in to comment.