Skip to content

Commit

Permalink
ZFIN-9460 create reports for transcripts
Browse files Browse the repository at this point in the history
  • Loading branch information
cmpich committed Dec 5, 2024
1 parent 76462e3 commit cdc165b
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -2468,7 +2468,7 @@ public List<LinkDisplay> getAllVegaGeneDBLinksTranscript() {
JOIN marker_relationship mr ON mr.mrel_mrkr_2_zdb_id=dbl.dblink_linked_recid
LEFT OUTER JOIN record_attribution ra ON ra.recattrib_data_zdb_id=dbl.dblink_zdb_id
WHERE mr.mrel_type='gene produces transcript'
AND fdb.fdb_db_name='VEGA'
AND upper(fdb.fdb_db_name) in ('VEGA','VEGA_TRANS')
""";

Query query = HibernateUtil.currentSession().createSQLQuery(sql).setResultTransformer(markerDBLinkTransformer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.text.DateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -675,6 +676,11 @@ private void getMarkerDBLinksWithVegaEnsemblOnlyAccessions() {
List<String> vegaGeneList = vegaList.stream().map(LinkDisplay::getAssociatedGeneID).toList();
// genbank gene list
List<String> genbankGeneList = genbankList.stream().map(markerDBLink1 -> markerDBLink1.getMarker().getZdbID()).toList();
List<String> ensdargGeneList = ensdargList.stream().map(markerDBLink -> markerDBLink.getMarker().getZdbID()).toList();

//getEnsemblOnlyGenes(vegaGeneList, genbankGeneList, ensdargGeneList, ensdargList);
getNcbiOnlyGenes(vegaGeneList, genbankGeneList, ensdargGeneList, genbankList);
//getVegaOnlyGenes(vegaList, vegaGeneList, genbankGeneList, ensdargGeneList);
ensdargList.removeIf(markerDBLink -> !vegaGeneList.contains(markerDBLink.getMarker().getZdbID()));
System.out.println("Number of Ensembl Genes that also have a Vega Gene: " + ensdargList.size());
ensdargList.removeIf(markerDBLink -> genbankGeneList.contains(markerDBLink.getMarker().getZdbID()));
Expand All @@ -688,6 +694,86 @@ private void getMarkerDBLinksWithVegaEnsemblOnlyAccessions() {
}
}

private static void getVegaOnlyGenes(List<LinkDisplay> vegaList, List<String> vegaGeneList, List<String> genbankGeneList, List<String> ensdargGeneList) {
List<String> vegaOnly = new ArrayList<>();
vegaGeneList.forEach(vegaID -> {
if (!genbankGeneList.contains(vegaID) && !ensdargGeneList.contains(vegaID)) {
vegaOnly.add(vegaID);
}
});


Map<Marker, List<TranscriptDBLink>> geneEnsdartMap = getSequenceRepository().getAllRelevantEnsemblTranscripts();

vegaOnly.forEach(s -> {
Marker gene = getMarkerRepository().getMarkerByID(s);
List<TranscriptDBLink> transcriptList = new ArrayList<>();
geneEnsdartMap.entrySet().stream().filter(markerListEntry -> markerListEntry.getKey().getZdbID().equals(gene.getZdbID()))
.forEach(markerListEntry -> {
transcriptList.addAll(markerListEntry.getValue());
Set<Genotype> genoList = new HashSet<>();
transcriptList.forEach(transcriptDBLink -> {
genoList.add(transcriptDBLink.getTranscript().getStrain());
});
if (CollectionUtils.isNotEmpty(genoList) && genoList.size() > 1) {
System.out.println("More than one type of strain in the set of transcripts per gene: " + gene.getZdbID());
}
if (CollectionUtils.isNotEmpty(genoList) && genoList.size() == 1) {
AtomicReference<String> vega = new AtomicReference<>();
vegaList.forEach(linkDisplay -> {
if(linkDisplay.getAssociatedGeneID().equals(gene.getZdbID()) && linkDisplay.getAccession().startsWith("OTTDARG")){
vega.set(linkDisplay.getAccession());
}
});
Genotype next = genoList.iterator().next();
if (next != null && next.getHandle().equals("TU")) {
System.out.println(s + "\t" + gene.getAbbreviation()+"\t"+vega.get());
}
}
});
});
}

private static List<String> getEnsemblOnlyGenes(List<String> vegaGeneList, List<String> genbankGeneList, List<String> ensdargGeneList, List<MarkerDBLink> ensdargList) {
List<String> ensdargOnly = new ArrayList<>();
ensdargGeneList.forEach(ensdarg -> {
if (!genbankGeneList.contains(ensdarg) && !vegaGeneList.contains(ensdarg)) {
ensdargOnly.add(ensdarg);
}
});
ensdargOnly.forEach(id ->{
Marker gene = getMarkerRepository().getMarkerByID(id);
AtomicReference<String> ensdarg = new AtomicReference<>();
ensdargList.forEach(linkDisplay -> {
if(linkDisplay.getMarker().getZdbID().equals(id)){
ensdarg.set(linkDisplay.getAccessionNumber());
}
});
System.out.println(id + "\t" + gene.getAbbreviation()+"\t"+ensdarg.get());
} );
return ensdargOnly;
}

private static List<String> getNcbiOnlyGenes(List<String> vegaGeneList, List<String> genbankGeneList, List<String> ensdargGeneList, List<MarkerDBLink> ncbiList) {
List<String> ncbiOnly = new ArrayList<>();
genbankGeneList.forEach(ncbiID -> {
if (!ensdargGeneList.contains(ncbiID) && !vegaGeneList.contains(ncbiID)) {
ncbiOnly.add(ncbiID);
}
});
ncbiOnly.forEach(id ->{
Marker gene = getMarkerRepository().getMarkerByID(id);
AtomicReference<String> ensdarg = new AtomicReference<>();
ncbiList.forEach(linkDisplay -> {
if(linkDisplay.getMarker().getZdbID().equals(id)){
ensdarg.set(linkDisplay.getAccessionNumber());
}
});
System.out.println(id + "\t" + gene.getAbbreviation()+"\t"+ensdarg.get());
} );
return ncbiOnly;
}

private Map<String, MarkerDBLink> getMarkerDBLinksWithGenbankEnsemblOnlyAccessions() {
List<MarkerDBLink> ensdargList = getSequenceRepository().getAllEnsemblGenes(ForeignDB.AvailableName.ENSEMBL_GRCZ11_);
List<LinkDisplay> vegaList = getMarkerRepository().getAllVegaGeneDBLinksTranscript();
Expand Down

0 comments on commit cdc165b

Please sign in to comment.