Skip to content

Commit

Permalink
Switched Lucene similarity back to default which seems to work better…
Browse files Browse the repository at this point in the history
…. Parent-child relationships are now extracted from concept_ancestor instead of concept_relationship to better cover non-condition concepts. Added index statistics info panel. Can now search the index even without having a code set loaded (for easier debugging). Concept information dialog now has maximize button (to maximize on second monitor), and all tables are sortable.
  • Loading branch information
schuemie committed Jun 12, 2017
1 parent ea51b89 commit 9691c8c
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 87 deletions.
58 changes: 36 additions & 22 deletions src/org/ohdsi/usagi/BerkeleyDbEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class BerkeleyDbEngine {
private EntityStore store;
private ConceptDataAccessor conceptDataAccessor;
private MapsToRelationshipDataAccessor mapsToRelationshipDataAccessor;
private SubsumesRelationshipDataAccessor subsumesRelationshipDataAccessor;
private ParentChildRelationshipDataAccessor parentChildRelationshipDataAccessor;
private String databaseFolder;
private boolean isOpenForReading = false;
private boolean isOpenForWriting = false;
Expand Down Expand Up @@ -60,12 +60,26 @@ private void open(boolean create) {
store = new EntityStore(dbEnvironment, "EntityStore", storeConfig);
conceptDataAccessor = new ConceptDataAccessor();
mapsToRelationshipDataAccessor = new MapsToRelationshipDataAccessor();
subsumesRelationshipDataAccessor = new SubsumesRelationshipDataAccessor();
parentChildRelationshipDataAccessor = new ParentChildRelationshipDataAccessor();
} catch (DatabaseException dbe) {
throw new RuntimeException(dbe);
}
}

public class BerkeleyDbStats {
public long conceptCount;
public long mapsToRelationshipCount;
public long parentChildCount;
}

public BerkeleyDbStats getStats() {
BerkeleyDbStats berkeleyDbStats = new BerkeleyDbStats();
berkeleyDbStats.conceptCount = store.getPrimaryIndex(Integer.class, Concept.class).count();
berkeleyDbStats.mapsToRelationshipCount = store.getPrimaryIndex(Integer.class, MapsToRelationship.class).count();
berkeleyDbStats.parentChildCount = store.getPrimaryIndex(Integer.class, ParentChildRelationShip.class).count();
return berkeleyDbStats;
}

public void put(Concept concept) {
conceptDataAccessor.primaryIndex.put(concept);
}
Expand All @@ -74,8 +88,8 @@ public void put(MapsToRelationship mapsToRelationship) {
mapsToRelationshipDataAccessor.primaryIndex.put(mapsToRelationship);
}

public void put(SubsumesRelationship subsumesRelationship) {
subsumesRelationshipDataAccessor.primaryIndex.put(subsumesRelationship);
public void put(ParentChildRelationShip parentChildRelationship) {
parentChildRelationshipDataAccessor.primaryIndex.put(parentChildRelationship);
}

public EntityCursor<Concept> getConceptCursor() {
Expand All @@ -99,25 +113,25 @@ public List<MapsToRelationship> getMapsToRelationshipsByConceptId2(int conceptId
return relationships;
}

public List<SubsumesRelationship> getSubsumesRelationshipsByParentConceptId(int conceptId) {
EntityIndex<Integer, SubsumesRelationship> subIndex = subsumesRelationshipDataAccessor.secondaryIndexParent.subIndex(conceptId);
EntityCursor<SubsumesRelationship> cursor = subIndex.entities();
List<SubsumesRelationship> relationships = new ArrayList<SubsumesRelationship>();
public List<ParentChildRelationShip> getParentChildRelationshipsByParentConceptId(int conceptId) {
EntityIndex<Integer, ParentChildRelationShip> subIndex = parentChildRelationshipDataAccessor.secondaryIndexParent.subIndex(conceptId);
EntityCursor<ParentChildRelationShip> cursor = subIndex.entities();
List<ParentChildRelationShip> relationships = new ArrayList<ParentChildRelationShip>();
try {
for (SubsumesRelationship relationship : cursor)
for (ParentChildRelationShip relationship : cursor)
relationships.add(relationship);
} finally {
cursor.close();
}
return relationships;
}
public List<SubsumesRelationship> getSubsumesRelationshipsByChildConceptId(int conceptId) {
EntityIndex<Integer, SubsumesRelationship> subIndex = subsumesRelationshipDataAccessor.secondaryIndexChild.subIndex(conceptId);
EntityCursor<SubsumesRelationship> cursor = subIndex.entities();
List<SubsumesRelationship> relationships = new ArrayList<SubsumesRelationship>();

public List<ParentChildRelationShip> getParentChildRelationshipsByChildConceptId(int conceptId) {
EntityIndex<Integer, ParentChildRelationShip> subIndex = parentChildRelationshipDataAccessor.secondaryIndexChild.subIndex(conceptId);
EntityCursor<ParentChildRelationShip> cursor = subIndex.entities();
List<ParentChildRelationShip> relationships = new ArrayList<ParentChildRelationShip>();
try {
for (SubsumesRelationship relationship : cursor)
for (ParentChildRelationShip relationship : cursor)
relationships.add(relationship);
} finally {
cursor.close();
Expand All @@ -141,7 +155,7 @@ public void shutdown() throws DatabaseException {
}

private class ConceptDataAccessor {
public PrimaryIndex<Integer, Concept> primaryIndex;
public PrimaryIndex<Integer, Concept> primaryIndex;

public ConceptDataAccessor() throws DatabaseException {
primaryIndex = store.getPrimaryIndex(Integer.class, Concept.class);
Expand All @@ -159,13 +173,13 @@ public MapsToRelationshipDataAccessor() throws DatabaseException {
}
}

private class SubsumesRelationshipDataAccessor {
public PrimaryIndex<Integer, SubsumesRelationship> primaryIndex;
public SecondaryIndex<Integer, Integer, SubsumesRelationship> secondaryIndexParent;
public SecondaryIndex<Integer, Integer, SubsumesRelationship> secondaryIndexChild;
private class ParentChildRelationshipDataAccessor {
public PrimaryIndex<Integer, ParentChildRelationShip> primaryIndex;
public SecondaryIndex<Integer, Integer, ParentChildRelationShip> secondaryIndexParent;
public SecondaryIndex<Integer, Integer, ParentChildRelationShip> secondaryIndexChild;

public SubsumesRelationshipDataAccessor() throws DatabaseException {
primaryIndex = store.getPrimaryIndex(Integer.class, SubsumesRelationship.class);
public ParentChildRelationshipDataAccessor() throws DatabaseException {
primaryIndex = store.getPrimaryIndex(Integer.class, ParentChildRelationShip.class);
secondaryIndexParent = store.getSecondaryIndex(primaryIndex, Integer.class, "parentConceptId");
secondaryIndexChild = store.getSecondaryIndex(primaryIndex, Integer.class, "childConceptId");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import static com.sleepycat.persist.model.Relationship.*;

@Entity
public class SubsumesRelationship {
public class ParentChildRelationShip {
@PrimaryKey(sequence="Sequence_Namespace")
public int id;

Expand All @@ -18,10 +18,10 @@ public class SubsumesRelationship {
@SecondaryKey(relate=MANY_TO_ONE)
public int childConceptId;

public SubsumesRelationship() {}
public ParentChildRelationShip() {}

public SubsumesRelationship(Row row) {
parentConceptId = row.getInt("concept_id_1");
childConceptId = row.getInt("concept_id_2");
public ParentChildRelationShip(Row row) {
parentConceptId = row.getInt("ancestor_concept_id");
childConceptId = row.getInt("descendant_concept_id");
}
}
50 changes: 13 additions & 37 deletions src/org/ohdsi/usagi/UsagiSearchEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
Expand Down Expand Up @@ -239,18 +238,21 @@ public void run() {
System.gc();
if (dialog != null)
dialog.setVisible(false);
openIndexForSearching();
openIndexForSearching(true);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

public void openIndexForSearching() {
public void openIndexForSearching(boolean useDerivedIndex) {
try {
reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + DERIVED_INDEX_FOLDER)));
if (useDerivedIndex)
reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + DERIVED_INDEX_FOLDER)));
else
reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + MAIN_INDEX_FOLDER)));
searcher = new IndexSearcher(reader);
searcher.setSimilarity(new CustomSimilarity());
searcher.setSimilarity(new DefaultSimilarity());
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
QueryParser typeQueryParser = new QueryParser(Version.LUCENE_4_9, "TYPE", new KeywordAnalyzer());
conceptQuery = typeQueryParser.parse(CONCEPT_TYPE_STRING);
Expand All @@ -267,37 +269,6 @@ public void openIndexForSearching() {
}
}

public class CustomSimilarity extends DefaultSimilarity {

@Override
public float lengthNorm(FieldInvertState state) {
// simply return the field's configured boost value
// instead of also factoring in the field's length
return 1;
}

@Override
public float idf(long docFreq, long numDocs) {
return (float) (Math.log(numDocs / (docFreq + 1)));
}

@Override
public float queryNorm(float sumOfSquaredWeights) {
return 1;
}

@Override
public float tf(float freq) {
return freq;
}

@Override
public float coord(int overlap, int maxOverlap) {
return 1;
}

}

public void close() {
try {
if (reader != null) {
Expand All @@ -306,7 +277,7 @@ public void close() {
System.gc();
}
if (writer != null) {
// writer.forceMerge(1);
// writer.forceMerge(1);
writer.close();
writer = null;
}
Expand All @@ -315,6 +286,11 @@ public void close() {
}
}

public int getTermCount() {
return reader.numDocs();

}

public List<ScoredConcept> search(String searchTerm, boolean useMlt, Collection<Integer> filterConceptIds, String filterDomain, String filterConceptClass,
String filterVocabulary, boolean filterStandard, boolean includeSourceConcepts) {
List<ScoredConcept> results = new ArrayList<ScoredConcept>();
Expand Down
35 changes: 22 additions & 13 deletions src/org/ohdsi/usagi/indexBuilding/BerkeleyDbBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import org.ohdsi.usagi.BerkeleyDbEngine;
import org.ohdsi.usagi.MapsToRelationship;
import org.ohdsi.usagi.SubsumesRelationship;
import org.ohdsi.usagi.ParentChildRelationShip;
import org.ohdsi.usagi.Concept;
import org.ohdsi.usagi.indexBuilding.IndexBuildCoordinator.BuildThread;
import org.ohdsi.usagi.ui.Global;
Expand All @@ -36,6 +36,7 @@ public void buildIndex(String vocabFolder, String loincFileName, BuildThread bui
dbEngine = new BerkeleyDbEngine(Global.folder);
dbEngine.createDatabase();
loadRelationships(vocabFolder + "/CONCEPT_RELATIONSHIP.csv");
loadAncestors(vocabFolder + "/CONCEPT_ANCESTOR.csv");
loadConcepts(vocabFolder + "/CONCEPT.csv", loincFileName);
dbEngine.shutdown();
}
Expand All @@ -44,19 +45,27 @@ private void loadRelationships(String conceptRelationshipFileName) {
buildThread.report("Loading relationship information");
int count = 0;
for (Row row : new ReadAthenaFile(conceptRelationshipFileName)) {
if (row.get("invalid_reason") == null && !row.get("concept_id_1").equals(row.get("concept_id_2"))) {
if (row.get("relationship_id").equals("Maps to")) {
MapsToRelationship mapsToRelationship = new MapsToRelationship(row);
dbEngine.put(mapsToRelationship);
}
if (row.get("relationship_id").equals("Subsumes")) {
SubsumesRelationship subsumesRelationship = new SubsumesRelationship(row);
dbEngine.put(subsumesRelationship);
}
if (row.get("relationship_id").equals("Maps to") && row.get("invalid_reason") == null && !row.get("concept_id_1").equals(row.get("concept_id_2"))) {
MapsToRelationship mapsToRelationship = new MapsToRelationship(row);
dbEngine.put(mapsToRelationship);
}
count++;
if (count % 100000 == 0)
System.out.println("Loaded " + count + " relationships");
System.out.println("Processed " + count + " relationships");
}
}

private void loadAncestors(String conceptAncestorFileName) {
buildThread.report("Loading parent-child information");
int count = 0;
for (Row row : new ReadAthenaFile(conceptAncestorFileName)) {
if (row.get("min_levels_of_separation").equals("1") && !row.get("ancestor_concept_id").equals(row.get("descendant_concept_id"))) {
ParentChildRelationShip parentChildRelationship = new ParentChildRelationShip(row);
dbEngine.put(parentChildRelationship);
}
count++;
if (count % 100000 == 0)
System.out.println("Processed " + count + " relationships");
}
}

Expand All @@ -75,8 +84,8 @@ private void loadConcepts(String conceptFileName, String loincFileName) {
if (info != null)
concept.additionalInformation = info;
}
concept.parentCount = dbEngine.getSubsumesRelationshipsByChildConceptId(concept.conceptId).size();
concept.childCount = dbEngine.getSubsumesRelationshipsByParentConceptId(concept.conceptId).size();
concept.parentCount = dbEngine.getParentChildRelationshipsByChildConceptId(concept.conceptId).size();
concept.childCount = dbEngine.getParentChildRelationshipsByParentConceptId(concept.conceptId).size();
dbEngine.put(concept);
count++;
if (count % 100000 == 0)
Expand Down
15 changes: 10 additions & 5 deletions src/org/ohdsi/usagi/ui/ConceptInformationDialog.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import javax.swing.Box;
import javax.swing.BoxLayout;
import javax.swing.JButton;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
Expand All @@ -40,12 +40,13 @@
import javax.swing.ListSelectionModel;
import javax.swing.event.ListSelectionEvent;
import javax.swing.event.ListSelectionListener;
import javax.swing.table.TableRowSorter;

import org.ohdsi.usagi.Concept;
import org.ohdsi.usagi.MapsToRelationship;
import org.ohdsi.usagi.SubsumesRelationship;
import org.ohdsi.usagi.ParentChildRelationShip;

public class ConceptInformationDialog extends JDialog {
public class ConceptInformationDialog extends JFrame {

private static final long serialVersionUID = -2112565437136224217L;
private JTextArea area;
Expand All @@ -70,6 +71,7 @@ public ConceptInformationDialog() {
add(createButtonPanel(), BorderLayout.SOUTH);
setSize(800, 600);
setLocationRelativeTo(Global.frame);
UsagiMain.loadIcons(this);
}

private Component createCenterPanel() {
Expand All @@ -96,6 +98,7 @@ private Component createSourceConceptPanel() {
sourceConceptTable.setPreferredScrollableViewportSize(new Dimension(500, 45));
sourceConceptTable.setAutoResizeMode(JTable.AUTO_RESIZE_ALL_COLUMNS);
sourceConceptTable.setRowSelectionAllowed(false);
sourceConceptTable.setRowSorter(new TableRowSorter<ConceptTableModel>(sourceConceptTableModel));
sourceConceptTable.hideColumn("Parents");
sourceConceptTable.hideColumn("Children");
sourceConceptTable.hideColumn("Valid start date");
Expand All @@ -119,6 +122,7 @@ private Component createHierarchyPanel() {

parentConceptTableModel = new ConceptTableModel(false);
parentsConceptTable = new UsagiTable(parentConceptTableModel);
parentsConceptTable.setRowSorter(new TableRowSorter<ConceptTableModel>(parentConceptTableModel));
parentsConceptTable.setPreferredScrollableViewportSize(new Dimension(500, 45));
parentsConceptTable.setAutoResizeMode(JTable.AUTO_RESIZE_ALL_COLUMNS);
parentsConceptTable.setRowSelectionAllowed(true);
Expand Down Expand Up @@ -151,6 +155,7 @@ public void valueChanged(ListSelectionEvent event) {

childrenConceptTableModel = new ConceptTableModel(false);
childrenConceptTable = new UsagiTable(childrenConceptTableModel);
childrenConceptTable.setRowSorter(new TableRowSorter<ConceptTableModel>(childrenConceptTableModel));
childrenConceptTable.setPreferredScrollableViewportSize(new Dimension(500, 45));
childrenConceptTable.setAutoResizeMode(JTable.AUTO_RESIZE_ALL_COLUMNS);
childrenConceptTable.setRowSelectionAllowed(true);
Expand Down Expand Up @@ -306,12 +311,12 @@ private void showConcept(Concept concept) {
area.setText(conceptInfo.toString());

List<Concept> parents = new ArrayList<Concept>();
for (SubsumesRelationship relationship : Global.dbEngine.getSubsumesRelationshipsByChildConceptId(concept.conceptId))
for (ParentChildRelationShip relationship : Global.dbEngine.getParentChildRelationshipsByChildConceptId(concept.conceptId))
parents.add(Global.dbEngine.getConcept(relationship.parentConceptId));
parentConceptTableModel.setConcepts(parents);

List<Concept> children = new ArrayList<Concept>();
for (SubsumesRelationship relationship : Global.dbEngine.getSubsumesRelationshipsByParentConceptId(concept.conceptId))
for (ParentChildRelationShip relationship : Global.dbEngine.getParentChildRelationshipsByParentConceptId(concept.conceptId))
children.add(Global.dbEngine.getConcept(relationship.childConceptId));
childrenConceptTableModel.setConcepts(children);

Expand Down
2 changes: 2 additions & 0 deletions src/org/ohdsi/usagi/ui/Global.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.ohdsi.usagi.ui.actions.RebuildIndexAction;
import org.ohdsi.usagi.ui.actions.SaveAction;
import org.ohdsi.usagi.ui.actions.SaveAsAction;
import org.ohdsi.usagi.ui.actions.ShowStatsAction;

public class Global {
public static JFrame frame;
Expand Down Expand Up @@ -60,4 +61,5 @@ public class Global {
public static RebuildIndexAction rebuildIndexAction;
public static ExitAction exitAction;
public static String vocabularyVersion;
public static ShowStatsAction showStatsAction;
}
2 changes: 1 addition & 1 deletion src/org/ohdsi/usagi/ui/ImportDialog.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ public ImportCodesThread(List<SourceCode> sourceCodes, JProgressBar progressBar,
public void run() {
try {
Global.usagiSearchEngine.createDerivedIndex(sourceCodes, null);
Global.dbEngine.openForReading();
// Global.dbEngine.openForReading();

boolean filterStandard = filterPanel.getFilterStandard();
String filterConceptClass = null;
Expand Down
Loading

0 comments on commit 9691c8c

Please sign in to comment.