Skip to content

Commit

Permalink
ALS-6511: Fix avro formatting issue
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Jul 22, 2024
1 parent 1a3fd09 commit bc1cda5
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,12 @@ public void writeHeader(String[] data) {
writeMetadata();
}

private String formatFieldName(String s) {
return s.replaceAll("\\\\", "_");
protected String formatFieldName(String s) {
String formattedFieldName = s.replaceAll("\\W", "_");
if (Character.isDigit(formattedFieldName.charAt(0))) {
return "_" + formattedFieldName;
}
return formattedFieldName;
}

private void writeMetadata() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,25 @@ public void writeValidPFB() {
pfbWriter.close();
// todo: validate this programatically
}

@Test
public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\");
assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_startsWithDigit_prependUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\");
assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_randomGarbage_replaceWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#");
assertEquals("___my_garbage__vro_var_able_nam_", formattedName);
}
}

0 comments on commit bc1cda5

Please sign in to comment.