Skip to content

Commit

Permalink
ZFIN-8950: Use SQL for capturing the before and after counts for summ…
Browse files Browse the repository at this point in the history
…aries
  • Loading branch information
rtaylorzfin committed Dec 18, 2023
1 parent 9f06212 commit acf8169
Show file tree
Hide file tree
Showing 10 changed files with 314 additions and 270 deletions.
4 changes: 2 additions & 2 deletions source/org/zfin/uniprot/UniProtLoadActionsContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

import lombok.Builder;
import lombok.Getter;
import org.zfin.uniprot.dto.UniProtLoadSummaryDTO;
import org.zfin.uniprot.dto.UniProtLoadSummaryItemDTO;

import java.util.Set;

@Builder
@Getter
public class UniProtLoadActionsContainer {

private final UniProtLoadSummaryDTO summary;
private final UniProtLoadSummaryItemDTO summary;
private final Set<UniProtLoadAction> actions;

}
251 changes: 251 additions & 0 deletions source/org/zfin/uniprot/UniProtLoadSummaryService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package org.zfin.uniprot;

import org.hibernate.Session;
import org.hibernate.query.NativeQuery;
import org.zfin.framework.HibernateUtil;
import org.zfin.uniprot.dto.UniProtLoadSummaryItemDTO;
import org.zfin.uniprot.dto.UniProtLoadSummaryListDTO;

import java.math.BigInteger;
import java.util.List;

/**
* Generate a table of statistics like the legacy load did.
* This should be calculable based on the actions that were performed and the context.
*
* Example:
* count of records associated with UniProt before load after load percentage change
* --------------------------------------------- ----------- ----------- -----------------
* db_link records 194687 194462 -0.12
* external_note with db_link 0 25525
* genes with duplicated db_link notes 0 0 not calculated
* --------------------------------------------- ----------- ----------- -----------------
* marker_go_term_evidence IEA records 112109 112381 0.24
* marker_go_term_evidence records from SP 46510 46861 0.75
* marker_go_term_evidence records from IP 61647 61827 0.29
* marker_go_term_evidence records from EC 3952 3693 -6.55
* --------------------------------------------- ----------- ----------- -----------------
* go terms with IEA annotation 3701 3684 -0.46
* component go terms with IEA 432 431 -0.23
* function go terms with IEA 1958 1943 -0.77
* process go terms with IEA 1311 1310 -0.08
* --------------------------------------------- ----------- ----------- -----------------
* markers with IEA annotation 18033 17993 -0.22
* markers with IEA annotation component 432 431 -0.23
* markers with IEA annotation function 13536 13557 0.16
* markers with IEA annotation process 10938 10921 -0.16
*
*/
public class UniProtLoadSummaryService {

/**
* Returns the Summary list with descriptions and counts (for before)
* @return
*/
public static UniProtLoadSummaryListDTO getBeforeSummary() {
Session session = HibernateUtil.currentSession();
String sql = getSummarySql();
NativeQuery query = session.createSQLQuery(sql);
List results = query.list();
UniProtLoadSummaryListDTO summaryList = new UniProtLoadSummaryListDTO();
for (Object result : results) {
Object[] row = (Object[]) result;
String description = (String) row[3];
BigInteger count = (BigInteger) row[2];
UniProtLoadSummaryItemDTO summary = new UniProtLoadSummaryItemDTO(description, count.longValue(), null);
summaryList.putBeforeSummary(summary);
}
return summaryList;
}

/**
* Given the results of the "before" summary, this grabs the new data from the DB
* and fills in the "after" part of the summary
* @param beforeSummary
* @return
*/
public static UniProtLoadSummaryListDTO getAfterSummary(UniProtLoadSummaryListDTO beforeSummary) {
Session session = HibernateUtil.currentSession();
String sql = getSummarySql();
NativeQuery query = session.createSQLQuery(sql);
List results = query.list();
for (Object result : results) {
Object[] row = (Object[]) result;
String description = (String) row[3];
BigInteger count = (BigInteger) row[2];
UniProtLoadSummaryItemDTO summary = new UniProtLoadSummaryItemDTO(description, null, count.longValue());
beforeSummary.putAfterSummary(summary);
}
return beforeSummary;
}

/**
* Get the SQL query for the summary report.
* This is the same SQL that was used in the legacy perl scripts.
* @return
*/
private static String getSummarySql() {
return """
WITH external_note_data AS (
SELECT
extnote_zdb_id,
dblink_zdb_id,
dblink_info
FROM external_note
JOIN db_link ON extnote_data_zdb_id = dblink_zdb_id
WHERE dblink_info LIKE '%Swiss-Prot%'
),
marker_go_term_evidence_data AS (
SELECT
mrkrgoev_zdb_id,
mrkrgoev_evidence_code,
mrkrgoev_notes,
term_zdb_id,
term_ontology,
mrkr_zdb_id
FROM marker_go_term_evidence
JOIN term ON mrkrgoev_term_zdb_id = term_zdb_id
JOIN marker ON mrkr_zdb_id = mrkrgoev_mrkr_zdb_id
WHERE term_ont_id LIKE 'GO%'
AND mrkrgoev_evidence_code = 'IEA'
AND mrkrgoev_notes IN ('ZFIN SP keyword 2 GO', 'ZFIN InterPro 2 GO', 'ZFIN EC acc 2 GO')
)
SELECT 1 as row_number,
'numDblink' AS variable_name,
COUNT(*) AS count,
'db_link records' AS description
FROM db_link
WHERE dblink_info LIKE '%Swiss-Prot%'
UNION ALL
SELECT 2,
'numExternalNote',
COUNT(DISTINCT extnote_zdb_id),
'external_note with db_link'
FROM external_note_data
UNION ALL
SELECT 3,
'numMarkersWithRedundantDblkNote',
COUNT(DISTINCT extnote_data_zdb_id),
'genes with duplicated db_link notes'
FROM external_note note1
JOIN external_note_data ON note1.extnote_data_zdb_id = dblink_zdb_id
WHERE EXISTS (
SELECT 1 FROM external_note note2
WHERE note2.extnote_data_zdb_id = note1.extnote_data_zdb_id
AND note2.extnote_zdb_id != note1.extnote_zdb_id
)
UNION ALL
SELECT 4,
'numIEA',
COUNT(DISTINCT mrkrgoev_zdb_id),
'marker_go_term_evidence IEA records'
FROM marker_go_term_evidence_data
UNION ALL
SELECT 5,
'numIEASP2GO',
COUNT(DISTINCT mrkrgoev_zdb_id),
'marker_go_term_evidence records from SP'
FROM marker_go_term_evidence_data
WHERE mrkrgoev_notes = 'ZFIN SP keyword 2 GO'
UNION ALL
SELECT 6,
'numIEAInterPro2GO',
COUNT(DISTINCT mrkrgoev_zdb_id),
'marker_go_term_evidence records from IP'
FROM marker_go_term_evidence_data
WHERE mrkrgoev_notes = 'ZFIN InterPro 2 GO'
UNION ALL
SELECT 7,
'numIEAEC2GO',
COUNT(DISTINCT mrkrgoev_zdb_id),
'marker_go_term_evidence records from EC'
FROM marker_go_term_evidence_data
WHERE mrkrgoev_notes = 'ZFIN EC acc 2 GO'
UNION ALL
SELECT 8,
'numIEAterms',
COUNT(DISTINCT term_zdb_id),
'go terms with IEA annotation'
FROM marker_go_term_evidence_data
UNION ALL
SELECT 9,
'numIEAtermComponent',
COUNT(DISTINCT term_zdb_id),
'component go terms with IEA'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'cellular_component'
UNION ALL
SELECT 10,
'numIEAtermFunction',
COUNT(DISTINCT term_zdb_id),
'function go terms with IEA'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'molecular_function'
UNION ALL
SELECT 11,
'numIEAtermProcess',
COUNT(DISTINCT term_zdb_id),
'process go terms with IEA'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'biological_process'
UNION ALL
SELECT 12,
'numMrkr',
COUNT(DISTINCT mrkr_zdb_id),
'markers with IEA annotation'
FROM marker_go_term_evidence_data
UNION ALL
SELECT 13,
'numMrkrComponent',
COUNT(DISTINCT mrkr_zdb_id),
'markers with IEA annotation component'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'cellular_component'
UNION ALL
SELECT 14,
'numMrkrFunction',
COUNT(DISTINCT mrkr_zdb_id),
'markers with IEA annotation function'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'molecular_function'
UNION ALL
SELECT 15,
'numMrkrProcess',
COUNT(DISTINCT mrkr_zdb_id),
'markers with IEA annotation process'
FROM marker_go_term_evidence_data
WHERE term_ontology = 'biological_process'
ORDER BY row_number;
""";
}
}
8 changes: 0 additions & 8 deletions source/org/zfin/uniprot/dto/UniProtLoadSummaryDTO.java

This file was deleted.

4 changes: 4 additions & 0 deletions source/org/zfin/uniprot/dto/UniProtLoadSummaryItemDTO.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package org.zfin.uniprot.dto;

public record UniProtLoadSummaryItemDTO(String description, Long beforeLoadCount, Long afterLoadCount) {
}
38 changes: 38 additions & 0 deletions source/org/zfin/uniprot/dto/UniProtLoadSummaryListDTO.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.zfin.uniprot.dto;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonValue;

import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;

public class UniProtLoadSummaryListDTO {

@JsonIgnore
Map<String, UniProtLoadSummaryItemDTO> summaryList = new LinkedHashMap<>();

@JsonValue
public Collection<UniProtLoadSummaryItemDTO> values() {
return summaryList.values();
}

public void putBeforeSummary(UniProtLoadSummaryItemDTO summary) {
if (summaryList.containsKey(summary.description())) {
UniProtLoadSummaryItemDTO existingSummary = summaryList.get(summary.description());
summaryList.put(summary.description(), new UniProtLoadSummaryItemDTO(summary.description(), summary.beforeLoadCount(), existingSummary.afterLoadCount()));
} else {
summaryList.put(summary.description(), summary);
}
}

public void putAfterSummary(UniProtLoadSummaryItemDTO summary) {
if (summaryList.containsKey(summary.description())) {
UniProtLoadSummaryItemDTO existingSummary = summaryList.get(summary.description());
summaryList.put(summary.description(), new UniProtLoadSummaryItemDTO(summary.description(), existingSummary.beforeLoadCount(), summary.afterLoadCount()));
} else {
summaryList.put(summary.description(), summary);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.zfin.uniprot.dto.UniProtLoadSummaryDTO;
import org.zfin.uniprot.dto.UniProtLoadSummaryListDTO;

import java.util.ArrayList;
import java.util.Date;
Expand All @@ -16,7 +16,7 @@ public class SecondaryTermLoadActionsContainer {

private Long releaseID;
private Date creationDate;
private List<UniProtLoadSummaryDTO> summary;
private UniProtLoadSummaryListDTO summary;

private List<SecondaryTermLoadAction> actions = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ public SecondaryTermLoadActionsContainer getActionsContainer() {
container.setActions(getActions());
container.setReleaseID(release == null ? null : release.getUpr_id());
container.setCreationDate(new Date());
container.setSummary(SecondaryTermLoadService.createStatistics(getActions(), getContext()));
return container;
}
}
Loading

0 comments on commit acf8169

Please sign in to comment.