From 4c77a8bc2c783c6f74dcb13f89382ad1870fa4e2 Mon Sep 17 00:00:00 2001 From: fileoy Date: Thu, 8 Aug 2024 15:04:16 -0400 Subject: [PATCH 1/4] // Contents: - Now it accepts all hint types from PubTator rather than just organisms. e.g. genes, cell lines, diseases, chemicals - Added tests for the new hint types. - Updated the hint model to accept all hint types. --- src/model/hint.js | 15 +- .../routes/api/document/hint/pubtator.js | 18 +- src/util/registry.js | 45 +- .../10.1016_j.molcel.2016.11.034.json | 885 ++++++++++++++++++ .../10.1016_j.molcel.2019.04.005.json | 733 +++++++++++++++ .../10.1016_j.molcel.2024.01.007.json | 459 +++++++++ test/pubtator/pubtator.js | 136 ++- test/pubtator/pubtator_8.json | 4 +- 8 files changed, 2253 insertions(+), 42 deletions(-) create mode 100644 test/pubtator/10.1016_j.molcel.2016.11.034.json create mode 100644 test/pubtator/10.1016_j.molcel.2019.04.005.json create mode 100644 test/pubtator/10.1016_j.molcel.2024.01.007.json diff --git a/src/model/hint.js b/src/model/hint.js index bc65e6e4..d23e46fc 100644 --- a/src/model/hint.js +++ b/src/model/hint.js @@ -1,9 +1,18 @@ import _ from 'lodash'; +import { ENTITY_TYPE } from './element/entity-type.js'; // Define constants for Hint types, combining ORGANISM with ENTITY_TYPE -const HINT_TYPE = Object.freeze({ - ORGANISM: 'organism', -}); +const HINT_TYPE = Object.freeze( + _.assign( + { + ORGANISM: 'organism', + DISEASE: 'disease', + CELL_LINE: 'cell_line', + VARIANT: 'variant', + }, + ENTITY_TYPE, + ), +); // Flatten the HINT_TYPE object to create an array of all hint types const HINT_TYPES = _.flatMap(HINT_TYPE); diff --git a/src/server/routes/api/document/hint/pubtator.js b/src/server/routes/api/document/hint/pubtator.js index f12f4f18..f01f27d0 100644 --- a/src/server/routes/api/document/hint/pubtator.js +++ b/src/server/routes/api/document/hint/pubtator.js @@ -12,18 +12,32 @@ function map(bioCDocument) { // See Table 1 https://www.ncbi.nlm.nih.gov/research/pubtator3/tutorial const PUBTATOR_ANNOTATION_TYPE = Object.freeze({ + GENE: 'Gene', SPECIES: 'Species', - // could add more types here when scope expands + CHEMICAL: 'Chemical', + DISEASE: 'Disease', + CELL_LINE: 'CellLine', + // VARIANT: 'Variant', }); const PUBTATOR_DATABASE = Object.freeze({ + ncbi_gene: 'ncbi_gene', ncbi_taxonomy: 'ncbi_taxonomy', - // could add more databases here when scope expands + ncbi_mesh: 'ncbi_mesh', + cvcl: 'cvcl', + litvar: 'litvar', }); const entityTypes = new Map([ + [PUBTATOR_ANNOTATION_TYPE.GENE, HINT_TYPE.GGP], [PUBTATOR_ANNOTATION_TYPE.SPECIES, HINT_TYPE.ORGANISM], + [PUBTATOR_ANNOTATION_TYPE.CHEMICAL, HINT_TYPE.CHEMICAL], + [PUBTATOR_ANNOTATION_TYPE.DISEASE, HINT_TYPE.DISEASE], + [PUBTATOR_ANNOTATION_TYPE.CELL_LINE, HINT_TYPE.CELL_LINE], ]); const database2Xref = new Map([ + [PUBTATOR_ANNOTATION_TYPE.GENE, COLLECTIONS.NCBI_GENE], [PUBTATOR_DATABASE.ncbi_taxonomy, COLLECTIONS.NCBI_TAXONOMY], + [PUBTATOR_DATABASE.ncbi_mesh, COLLECTIONS.MESH], + [PUBTATOR_DATABASE.cvcl, COLLECTIONS.CELLOSAURUS], ]); /** diff --git a/src/util/registry.js b/src/util/registry.js index 628714c9..5e014d84 100644 --- a/src/util/registry.js +++ b/src/util/registry.js @@ -1,17 +1,36 @@ const COLLECTIONS = Object.freeze({ - PUBMED: { - dbname: 'PubMed', - dbPrefix: 'pubmed' - }, - /** - * The Taxonomy Database is a curated classification and nomenclature for all of the organisms - * in the public sequence databases. This currently represents about 10% of the described species of life on the planet. - */ - NCBI_TAXONOMY: { - dbname: 'NCBI Taxonomy', - dbPrefix: 'NCBITaxon' - } + PUBMED: { + dbname: 'PubMed', + dbPrefix: 'pubmed', + }, + NCBI_GENE: { + dbName: 'NCBI Gene', + dbPrefix: 'NCBIGene', + }, + /** + * The Taxonomy Database is a curated classification and nomenclature for all of the organisms + * in the public sequence databases. This currently represents about 10% of the described species of life on the planet. + */ + NCBI_TAXONOMY: { + dbname: 'NCBI Taxonomy', + dbPrefix: 'NCBITaxon', + }, + MESH: { + dbName: 'MeSH', + dbPrefix: 'mesh', + }, + CHEBI: { + dbName: 'ChEBI', + dbPrefix: 'CHEBI', + }, + CELLOSAURUS: { + dbName: 'CELLOSAURUS', + dbPrefix: 'cellosaurus', + }, + UNIPROT: { + dbName: 'UniProt Knowledgebase', + dbPrefix: 'uniprot', + }, }); export { COLLECTIONS }; - diff --git a/test/pubtator/10.1016_j.molcel.2016.11.034.json b/test/pubtator/10.1016_j.molcel.2016.11.034.json new file mode 100644 index 00000000..13acf597 --- /dev/null +++ b/test/pubtator/10.1016_j.molcel.2016.11.034.json @@ -0,0 +1,885 @@ +{ + "_id": "28041912|None", + "id": "10.1016/j.molcel.2016.11.034", + "infons": { + "doi": "10.1016/j.molcel.2016.11.034", + "comment": "Synonym: p65; NF-κB" + }, + "passages": [ + { + "infons": { + "journal": "Mol Cell. 2017 Jan 5;65(1):154-167. doi: 10.1016/j.molcel.2016.11.034. Epub 2016 ", + "year": "2017", + "article-id_pmc": "PMC5218896", + "type": "title", + "authors": "Jeong JH, Park SJ, Dickinson SI, Luo JL" + }, + "offset": 0, + "text": "A Constitutive Intrinsic Inflammatory Signaling Circuit Composed of miR-196b, Meis2, PPP3CC, and p65 Drives Prostate Cancer Castration Resistance.", + "sentences": [], + "annotations": [ + { + "id": "5", + "infons": { + "identifier": "442920", + "type": "Gene", + "valid": true, + "normalized": [442920], + "database": "ncbi_gene", + "normalized_id": 442920, + "biotype": "gene", + "name": "MIR196B", + "accession": "@GENE_MIR196B" + }, + "text": "miR-196b", + "locations": [ + { + "offset": 68, + "length": 8 + } + ] + }, + { + "id": "6", + "infons": { + "identifier": "4212", + "type": "Gene", + "ncbi_homologene": "7846", + "valid": true, + "normalized": [4212], + "database": "ncbi_gene", + "normalized_id": 4212, + "biotype": "gene", + "name": "MEIS2", + "accession": "@GENE_MEIS2" + }, + "text": "Meis2", + "locations": [ + { + "offset": 78, + "length": 5 + } + ] + }, + { + "id": "7", + "infons": { + "identifier": "5533", + "type": "Gene", + "ncbi_homologene": "68475", + "valid": true, + "normalized": [5533], + "database": "ncbi_gene", + "normalized_id": 5533, + "biotype": "gene", + "name": "PPP3CC", + "accession": "@GENE_PPP3CC" + }, + "text": "PPP3CC", + "locations": [ + { + "offset": 85, + "length": 6 + } + ] + }, + { + "id": "8", + "infons": { + "identifier": "5970", + "type": "Gene", + "ncbi_homologene": "32064", + "valid": true, + "normalized": [5970], + "database": "ncbi_gene", + "normalized_id": 5970, + "biotype": "gene", + "name": "RELA", + "accession": "@GENE_RELA" + }, + "text": "p65", + "locations": [ + { + "offset": 97, + "length": 3 + } + ] + }, + { + "id": "9", + "infons": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "text": "Prostate Cancer Castration", + "locations": [ + { + "offset": 108, + "length": 26 + } + ] + } + ], + "relations": [] + }, + { + "infons": { + "type": "abstract" + }, + "offset": 147, + "text": "Androgen deprivation therapy is the most effective treatment for advanced prostate cancer, but almost all cancer eventually becomes castration resistant, and the underlying mechanisms are largely unknown. Here, we show that an intrinsic constitutively activated feedforward signaling circuit composed of IkappaBalpha/NF-kappaB(p65), miR-196b-3p, Meis2, and PPP3CC is formed during the emergence of castration-resistant prostate cancer (CRPC). This circuit controls the expression of stem cell transcription factors that drives the high tumorigenicity of CRPC cells. Interrupting the circuit by targeting its individual components significantly impairs the tumorigenicity and CRPC development. Notably, constitutive activation of IkappaBalpha/NF-kappaB(p65) in this circuit is not dependent on the activation of traditional IKKbeta/NF-kappaB pathways that are important in normal immune responses. Therefore, our studies present deep insight into the bona fide mechanisms underlying castration resistance and provide the foundation for the development of CRPC therapeutic strategies that would be highly efficient while avoiding indiscriminate IKK/NF-kappaB inhibition in normal cells.", + "sentences": [], + "annotations": [ + { + "id": "28", + "infons": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "text": "prostate cancer", + "locations": [ + { + "offset": 221, + "length": 15 + } + ] + }, + { + "id": "29", + "infons": { + "identifier": "MESH:D009369", + "type": "Disease", + "valid": true, + "normalized": ["D009369"], + "database": "ncbi_mesh", + "normalized_id": "D009369", + "biotype": "disease", + "name": "Neoplasms", + "accession": "@DISEASE_Neoplasms" + }, + "text": "cancer", + "locations": [ + { + "offset": 253, + "length": 6 + } + ] + }, + { + "id": "30", + "infons": { + "identifier": "4792", + "type": "Gene", + "ncbi_homologene": "7863", + "valid": true, + "normalized": [4792], + "database": "ncbi_gene", + "normalized_id": 4792, + "biotype": "gene", + "name": "NFKBIA", + "accession": "@GENE_NFKBIA" + }, + "text": "IkappaBalpha", + "locations": [ + { + "offset": 451, + "length": 12 + } + ] + }, + { + "id": "31", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 464, + "length": 9 + } + ] + }, + { + "id": "32", + "infons": { + "identifier": "5970", + "type": "Gene", + "ncbi_homologene": "32064", + "valid": true, + "normalized": [5970], + "database": "ncbi_gene", + "normalized_id": 5970, + "biotype": "gene", + "name": "RELA", + "accession": "@GENE_RELA" + }, + "text": "p65", + "locations": [ + { + "offset": 474, + "length": 3 + } + ] + }, + { + "id": "33", + "infons": { + "identifier": "4212", + "type": "Gene", + "ncbi_homologene": "7846", + "valid": true, + "normalized": [4212], + "database": "ncbi_gene", + "normalized_id": 4212, + "biotype": "gene", + "name": "MEIS2", + "accession": "@GENE_MEIS2" + }, + "text": "Meis2", + "locations": [ + { + "offset": 493, + "length": 5 + } + ] + }, + { + "id": "34", + "infons": { + "identifier": "5533", + "type": "Gene", + "ncbi_homologene": "68475", + "valid": true, + "normalized": [5533], + "database": "ncbi_gene", + "normalized_id": 5533, + "biotype": "gene", + "name": "PPP3CC", + "accession": "@GENE_PPP3CC" + }, + "text": "PPP3CC", + "locations": [ + { + "offset": 504, + "length": 6 + } + ] + }, + { + "id": "35", + "infons": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "text": "castration-resistant prostate cancer", + "locations": [ + { + "offset": 545, + "length": 36 + } + ] + }, + { + "id": "36", + "infons": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "text": "CRPC", + "locations": [ + { + "offset": 583, + "length": 4 + } + ] + }, + { + "id": "37", + "infons": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "text": "CRPC", + "locations": [ + { + "offset": 701, + "length": 4 + } + ] + }, + { + "id": "38", + "infons": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "text": "CRPC", + "locations": [ + { + "offset": 822, + "length": 4 + } + ] + }, + { + "id": "39", + "infons": { + "identifier": "4792", + "type": "Gene", + "ncbi_homologene": "7863", + "valid": true, + "normalized": [4792], + "database": "ncbi_gene", + "normalized_id": 4792, + "biotype": "gene", + "name": "NFKBIA", + "accession": "@GENE_NFKBIA" + }, + "text": "IkappaBalpha", + "locations": [ + { + "offset": 876, + "length": 12 + } + ] + }, + { + "id": "40", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 889, + "length": 9 + } + ] + }, + { + "id": "41", + "infons": { + "identifier": "5970", + "type": "Gene", + "ncbi_homologene": "32064", + "valid": true, + "normalized": [5970], + "database": "ncbi_gene", + "normalized_id": 5970, + "biotype": "gene", + "name": "RELA", + "accession": "@GENE_RELA" + }, + "text": "p65", + "locations": [ + { + "offset": 899, + "length": 3 + } + ] + }, + { + "id": "42", + "infons": { + "identifier": "3551", + "type": "Gene", + "ncbi_homologene": "7782", + "valid": true, + "normalized": [3551], + "database": "ncbi_gene", + "normalized_id": 3551, + "biotype": "gene", + "name": "IKBKB", + "accession": "@GENE_IKBKB" + }, + "text": "IKKbeta", + "locations": [ + { + "offset": 970, + "length": 7 + } + ] + }, + { + "id": "43", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 978, + "length": 9 + } + ] + }, + { + "id": "44", + "infons": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "text": "CRPC", + "locations": [ + { + "offset": 1201, + "length": 4 + } + ] + }, + { + "id": "45", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 1294, + "length": 9 + } + ] + } + ], + "relations": [] + } + ], + "relations": [ + { + "id": "R1", + "infons": { + "score": "0.9996", + "role1": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "role2": { + "identifier": "4792", + "type": "Gene", + "valid": true, + "normalized": [4792], + "database": "ncbi_gene", + "normalized_id": 4792, + "biotype": "gene", + "name": "NFKBIA", + "accession": "@GENE_NFKBIA" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "0", + "role": "12,7" + } + ] + }, + { + "id": "R2", + "infons": { + "score": "0.9163", + "role1": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "role2": { + "identifier": "442920", + "type": "Gene", + "valid": true, + "normalized": [442920], + "database": "ncbi_gene", + "normalized_id": 442920, + "biotype": "gene", + "name": "MIR196B", + "accession": "@GENE_MIR196B" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "1", + "role": "4,0" + } + ] + }, + { + "id": "R3", + "infons": { + "score": "0.9986", + "role1": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "role2": { + "identifier": "5970", + "type": "Gene", + "valid": true, + "normalized": [5970], + "database": "ncbi_gene", + "normalized_id": 5970, + "biotype": "gene", + "name": "RELA", + "accession": "@GENE_RELA" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "2", + "role": "4,3" + } + ] + }, + { + "id": "R4", + "infons": { + "score": "0.9995", + "role1": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "role2": { + "identifier": "4790", + "type": "Gene", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "3", + "role": "12,8" + } + ] + }, + { + "id": "R5", + "infons": { + "score": "0.9995", + "role1": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "role2": { + "identifier": "4212", + "type": "Gene", + "valid": true, + "normalized": [4212], + "database": "ncbi_gene", + "normalized_id": 4212, + "biotype": "gene", + "name": "MEIS2", + "accession": "@GENE_MEIS2" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "4", + "role": "12,10" + } + ] + }, + { + "id": "R6", + "infons": { + "score": "0.9689", + "role1": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "role2": { + "identifier": "5533", + "type": "Gene", + "valid": true, + "normalized": [5533], + "database": "ncbi_gene", + "normalized_id": 5533, + "biotype": "gene", + "name": "PPP3CC", + "accession": "@GENE_PPP3CC" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "5", + "role": "4,2" + } + ] + }, + { + "id": "R7", + "infons": { + "score": "0.9996", + "role1": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "role2": { + "identifier": "5533", + "type": "Gene", + "valid": true, + "normalized": [5533], + "database": "ncbi_gene", + "normalized_id": 5533, + "biotype": "gene", + "name": "PPP3CC", + "accession": "@GENE_PPP3CC" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "6", + "role": "12,11" + } + ] + }, + { + "id": "R8", + "infons": { + "score": "0.9323", + "role1": { + "identifier": "MESH:D011471", + "type": "Disease", + "valid": true, + "normalized": ["D011471"], + "database": "ncbi_mesh", + "normalized_id": "D011471", + "biotype": "disease", + "name": "Prostatic Neoplasms", + "accession": "@DISEASE_Prostatic_Neoplasms" + }, + "role2": { + "identifier": "4212", + "type": "Gene", + "valid": true, + "normalized": [4212], + "database": "ncbi_gene", + "normalized_id": 4212, + "biotype": "gene", + "name": "MEIS2", + "accession": "@GENE_MEIS2" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "7", + "role": "4,1" + } + ] + }, + { + "id": "R9", + "infons": { + "score": "0.9995", + "role1": { + "identifier": "MESH:D064129", + "type": "Disease", + "valid": true, + "normalized": ["D064129"], + "database": "ncbi_mesh", + "normalized_id": "D064129", + "biotype": "disease", + "name": "Prostatic Neoplasms Castration-Resistant", + "accession": "@DISEASE_Prostatic_Neoplasms_Castration_Resistant" + }, + "role2": { + "identifier": "5970", + "type": "Gene", + "valid": true, + "normalized": [5970], + "database": "ncbi_gene", + "normalized_id": 5970, + "biotype": "gene", + "name": "RELA", + "accession": "@GENE_RELA" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "8", + "role": "12,9" + } + ] + } + ], + "pmid": 28041912, + "pmcid": null, + "meta": {}, + "date": "2017-01-05T00:00:00Z", + "journal": "Mol Cell", + "authors": ["Jeong JH", "Park SJ", "Dickinson SI", "Luo JL"], + "relations_display": [ + { + "name": "associate|@DISEASE_Prostatic_Neoplasms_Castration_Resistant|@GENE_NFKBIA" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms|@GENE_MIR196B" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms|@GENE_RELA" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms_Castration_Resistant|@GENE_NFKB1" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms_Castration_Resistant|@GENE_MEIS2" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms|@GENE_PPP3CC" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms_Castration_Resistant|@GENE_PPP3CC" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms|@GENE_MEIS2" + }, + { + "name": "associate|@DISEASE_Prostatic_Neoplasms_Castration_Resistant|@GENE_RELA" + } + ] +} diff --git a/test/pubtator/10.1016_j.molcel.2019.04.005.json b/test/pubtator/10.1016_j.molcel.2019.04.005.json new file mode 100644 index 00000000..556f7f96 --- /dev/null +++ b/test/pubtator/10.1016_j.molcel.2019.04.005.json @@ -0,0 +1,733 @@ +{ + "_id": "31053471|None", + "id": "10.1016/j.molcel.2019.04.005", + "infons": { + "doi": "10.1016/j.molcel.2019.04.005", + "comment": "Synonym: PD-1 (chemical)" + }, + "passages": [ + { + "infons": { + "journal": "Mol Cell. 2019 Jun 20;74(6):1215-1226.e4. doi: 10.1016/j.molcel.2019.04.005. Epub ", + "year": "2019", + "article-id_pmc": "PMC6737939", + "type": "title", + "authors": "Tu X, Qin B, Zhang Y, Zhang C, Kahila M, Nowsheen S, Yin P, Yuan J, Pei H, Li H, Yu J, Song Z, Zhou Q, Zhao F, Liu J, Zhang C, Dong H, Mutter RW, Lou Z" + }, + "offset": 0, + "text": "PD-L1 (B7-H1) Competes with the RNA Exosome to Regulate the DNA Damage Response and Can Be Targeted to Sensitize to Radiation or Chemotherapy.", + "sentences": [], + "annotations": [ + { + "id": "2", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 0, + "length": 5 + } + ] + }, + { + "id": "3", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "B7-H1", + "locations": [ + { + "offset": 7, + "length": 5 + } + ] + } + ], + "relations": [] + }, + { + "infons": { + "type": "abstract" + }, + "offset": 143, + "text": "Programmed death ligand 1 (PD-L1, also called B7-H1) is an immune checkpoint protein that inhibits immune function through its binding of the programmed cell death protein 1 (PD-1) receptor. Clinically approved antibodies block extracellular PD-1 and PD-L1 binding, yet the role of intracellular PD-L1 in cancer remains poorly understood. Here, we discovered that intracellular PD-L1 acts as an RNA binding protein that regulates the mRNA stability of NBS1, BRCA1, and other DNA damage-related genes. Through competition with the RNA exosome, intracellular PD-L1 protects targeted RNAs from degradation, thereby increasing cellular resistance to DNA damage. RNA immunoprecipitation and RNA-seq experiments demonstrated that PD-L1 regulates RNA stability genome-wide. Furthermore, we developed a PD-L1 antibody, H1A, which abrogates the interaction of PD-L1 with CMTM6, thereby promoting PD-L1 degradation. Intracellular PD-L1 may be a potential therapeutic target to enhance the efficacy of radiotherapy and chemotherapy in cancer through the inhibition of DNA damage response and repair.", + "sentences": [], + "annotations": [ + { + "id": "24", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 170, + "length": 5 + } + ] + }, + { + "id": "25", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "B7-H1", + "locations": [ + { + "offset": 189, + "length": 5 + } + ] + }, + { + "id": "26", + "infons": { + "identifier": "5133", + "type": "Gene", + "ncbi_homologene": "3681", + "valid": true, + "normalized": [5133], + "database": "ncbi_gene", + "normalized_id": 5133, + "biotype": "gene", + "name": "PDCD1", + "accession": "@GENE_PDCD1" + }, + "text": "programmed cell death protein 1", + "locations": [ + { + "offset": 285, + "length": 31 + } + ] + }, + { + "id": "27", + "infons": { + "identifier": "5133", + "type": "Gene", + "ncbi_homologene": "3681", + "valid": true, + "normalized": [5133], + "database": "ncbi_gene", + "normalized_id": 5133, + "biotype": "gene", + "name": "PDCD1", + "accession": "@GENE_PDCD1" + }, + "text": "PD-1", + "locations": [ + { + "offset": 318, + "length": 4 + } + ] + }, + { + "id": "28", + "infons": { + "identifier": "5133", + "type": "Gene", + "ncbi_homologene": "3681", + "valid": true, + "normalized": [5133], + "database": "ncbi_gene", + "normalized_id": 5133, + "biotype": "gene", + "name": "PDCD1", + "accession": "@GENE_PDCD1" + }, + "text": "PD-1", + "locations": [ + { + "offset": 385, + "length": 4 + } + ] + }, + { + "id": "29", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 394, + "length": 5 + } + ] + }, + { + "id": "30", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 439, + "length": 5 + } + ] + }, + { + "id": "31", + "infons": { + "identifier": "MESH:D009369", + "type": "Disease", + "valid": true, + "normalized": ["D009369"], + "database": "ncbi_mesh", + "normalized_id": "D009369", + "biotype": "disease", + "name": "Neoplasms", + "accession": "@DISEASE_Neoplasms" + }, + "text": "cancer", + "locations": [ + { + "offset": 448, + "length": 6 + } + ] + }, + { + "id": "32", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 521, + "length": 5 + } + ] + }, + { + "id": "33", + "infons": { + "identifier": "4683", + "type": "Gene", + "ncbi_homologene": "1858", + "valid": true, + "normalized": [4683], + "database": "ncbi_gene", + "normalized_id": 4683, + "biotype": "gene", + "name": "NBN", + "accession": "@GENE_NBN" + }, + "text": "NBS1", + "locations": [ + { + "offset": 595, + "length": 4 + } + ] + }, + { + "id": "34", + "infons": { + "identifier": "672", + "type": "Gene", + "ncbi_homologene": "5276", + "valid": true, + "normalized": [672], + "database": "ncbi_gene", + "normalized_id": 672, + "biotype": "gene", + "name": "BRCA1", + "accession": "@GENE_BRCA1" + }, + "text": "BRCA1", + "locations": [ + { + "offset": 601, + "length": 5 + } + ] + }, + { + "id": "35", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 700, + "length": 5 + } + ] + }, + { + "id": "36", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 867, + "length": 5 + } + ] + }, + { + "id": "37", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 938, + "length": 5 + } + ] + }, + { + "id": "38", + "infons": { + "identifier": "-", + "type": "Chemical", + "valid": false, + "normalized_id": null, + "biotype": "chemical" + }, + "text": "H1A", + "locations": [ + { + "offset": 954, + "length": 3 + } + ] + }, + { + "id": "39", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 994, + "length": 5 + } + ] + }, + { + "id": "40", + "infons": { + "identifier": "54918", + "type": "Gene", + "ncbi_homologene": "9845", + "valid": true, + "normalized": [54918], + "database": "ncbi_gene", + "normalized_id": 54918, + "biotype": "gene", + "name": "CMTM6", + "accession": "@GENE_CMTM6" + }, + "text": "CMTM6", + "locations": [ + { + "offset": 1005, + "length": 5 + } + ] + }, + { + "id": "41", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 1030, + "length": 5 + } + ] + }, + { + "id": "42", + "infons": { + "identifier": "29126", + "type": "Gene", + "ncbi_homologene": "8560", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "text": "PD-L1", + "locations": [ + { + "offset": 1063, + "length": 5 + } + ] + }, + { + "id": "43", + "infons": { + "identifier": "MESH:D009369", + "type": "Disease", + "valid": true, + "normalized": ["D009369"], + "database": "ncbi_mesh", + "normalized_id": "D009369", + "biotype": "disease", + "name": "Neoplasms", + "accession": "@DISEASE_Neoplasms" + }, + "text": "cancer", + "locations": [ + { + "offset": 1167, + "length": 6 + } + ] + } + ], + "relations": [] + } + ], + "relations": [ + { + "id": "R1", + "infons": { + "score": "0.9528", + "role1": { + "identifier": "29126", + "type": "Gene", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "role2": { + "identifier": "54918", + "type": "Gene", + "valid": true, + "normalized": [54918], + "database": "ncbi_gene", + "normalized_id": 54918, + "biotype": "gene", + "name": "CMTM6", + "accession": "@GENE_CMTM6" + }, + "type": "Bind" + }, + "nodes": [ + { + "refid": "0", + "role": "15,18" + } + ] + }, + { + "id": "R2", + "infons": { + "score": "0.9655", + "role1": { + "identifier": "29126", + "type": "Gene", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "role2": { + "identifier": "5133", + "type": "Gene", + "valid": true, + "normalized": [5133], + "database": "ncbi_gene", + "normalized_id": 5133, + "biotype": "gene", + "name": "PDCD1", + "accession": "@GENE_PDCD1" + }, + "type": "Bind" + }, + "nodes": [ + { + "refid": "1", + "role": "2,4" + } + ] + }, + { + "id": "R3", + "infons": { + "score": "0.9912", + "role1": { + "identifier": "29126", + "type": "Gene", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "role2": { + "identifier": "4683", + "type": "Gene", + "valid": true, + "normalized": [4683], + "database": "ncbi_gene", + "normalized_id": 4683, + "biotype": "gene", + "name": "NBN", + "accession": "@GENE_NBN" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "2", + "role": "10,11" + } + ] + }, + { + "id": "R4", + "infons": { + "score": "0.999", + "role1": { + "identifier": "MESH:D009369", + "type": "Disease", + "valid": true, + "normalized": ["D009369"], + "database": "ncbi_mesh", + "normalized_id": "D009369", + "biotype": "disease", + "name": "Neoplasms", + "accession": "@DISEASE_Neoplasms" + }, + "role2": { + "identifier": "29126", + "type": "Gene", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "3", + "role": "9,7" + } + ] + }, + { + "id": "R5", + "infons": { + "score": "0.9981", + "role1": { + "identifier": "29126", + "type": "Gene", + "valid": true, + "normalized": [29126], + "database": "ncbi_gene", + "normalized_id": 29126, + "biotype": "gene", + "name": "CD274", + "accession": "@GENE_CD274" + }, + "role2": { + "identifier": "672", + "type": "Gene", + "valid": true, + "normalized": [672], + "database": "ncbi_gene", + "normalized_id": 672, + "biotype": "gene", + "name": "BRCA1", + "accession": "@GENE_BRCA1" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "4", + "role": "10,12" + } + ] + } + ], + "pmid": 31053471, + "pmcid": null, + "meta": {}, + "date": "2019-06-20T00:00:00Z", + "journal": "Mol Cell", + "authors": [ + "Tu X", + "Qin B", + "Zhang Y", + "Zhang C", + "Kahila M", + "Nowsheen S", + "Yin P", + "Yuan J", + "Pei H", + "Li H", + "Yu J", + "Song Z", + "Zhou Q", + "Zhao F", + "Liu J", + "Zhang C", + "Dong H", + "Mutter RW", + "Lou Z" + ], + "relations_display": [ + { + "name": "interact|@GENE_CD274|@GENE_CMTM6" + }, + { + "name": "interact|@GENE_CD274|@GENE_PDCD1" + }, + { + "name": "associate|@GENE_CD274|@GENE_NBN" + }, + { + "name": "associate|@DISEASE_Neoplasms|@GENE_CD274" + }, + { + "name": "associate|@GENE_CD274|@GENE_BRCA1" + } + ] +} diff --git a/test/pubtator/10.1016_j.molcel.2024.01.007.json b/test/pubtator/10.1016_j.molcel.2024.01.007.json new file mode 100644 index 00000000..92f1d738 --- /dev/null +++ b/test/pubtator/10.1016_j.molcel.2024.01.007.json @@ -0,0 +1,459 @@ +{ + "_id": "38309274|None", + "id": "10.1016/j.molcel.2024.01.007", + "infons": { + "doi": "10.1016/j.molcel.2024.01.007", + "comment": "Get: Missing IκBζ; mapping greek symbol" + }, + "passages": [ + { + "infons": { + "journal": "Mol Cell;2024Jan24. doi:10.1016/j.molcel.2024.01.007", + "year": "2024", + "type": "title", + "authors": "Alpsoy A, Wu XS, Pal S, Klingbeil O, Kumar P, El Demerdash O, Nalbant B, Vakoc CR, " + }, + "offset": 0, + "text": "IkappaBzeta is a dual-use coactivator of NF-kappaB and POU transcription factors.", + "sentences": [], + "annotations": [ + { + "id": "2", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 0, + "length": 11 + } + ] + }, + { + "id": "3", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 41, + "length": 9 + } + ] + } + ], + "relations": [] + }, + { + "infons": { + "type": "abstract" + }, + "offset": 82, + "text": "OCA-B, OCA-T1, and OCA-T2 belong to a family of coactivators that bind to POU transcription factors (TFs) to regulate gene expression in immune cells. Here, we identify IkappaBzeta (encoded by the NFKBIZ gene) as an additional coactivator of POU TFs. Although originally discovered as an inducible regulator of NF-kappaB, we show here that IkappaBzeta shares a microhomology with OCA proteins and uses this segment to bind to POU TFs and octamer-motif-containing DNA. Our functional experiments suggest that IkappaBzeta requires its interaction with POU TFs to coactivate immune-related genes. This finding is reinforced by epigenomic analysis of MYD88L265P-mutant lymphoma cells, which revealed colocalization of IkappaBzeta with the POU TF OCT2 and NF-kappaB:p50 at hundreds of DNA elements harboring octamer and kappaB motifs. These results suggest that IkappaBzeta is a transcriptional coactivator that can amplify and integrate the output of NF-kappaB and POU TFs at inducible genes in immune cells.", + "sentences": [], + "annotations": [ + { + "id": "17", + "infons": { + "identifier": "5450", + "type": "Gene", + "ncbi_homologene": "4543", + "valid": true, + "normalized": [5450], + "database": "ncbi_gene", + "normalized_id": 5450, + "biotype": "gene", + "name": "POU2AF1", + "accession": "@GENE_POU2AF1" + }, + "text": "OCA-B", + "locations": [ + { + "offset": 82, + "length": 5 + } + ] + }, + { + "id": "18", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 251, + "length": 11 + } + ] + }, + { + "id": "19", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "NFKBIZ", + "locations": [ + { + "offset": 279, + "length": 6 + } + ] + }, + { + "id": "20", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 393, + "length": 9 + } + ] + }, + { + "id": "21", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 422, + "length": 11 + } + ] + }, + { + "id": "22", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 590, + "length": 11 + } + ] + }, + { + "id": "23", + "infons": { + "identifier": "MESH:D008223", + "type": "Disease", + "valid": true, + "normalized": ["D008223"], + "database": "ncbi_mesh", + "normalized_id": "D008223", + "biotype": "disease", + "name": "Lymphoma", + "accession": "@DISEASE_Lymphoma" + }, + "text": "lymphoma", + "locations": [ + { + "offset": 747, + "length": 8 + } + ] + }, + { + "id": "24", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 796, + "length": 11 + } + ] + }, + { + "id": "25", + "infons": { + "identifier": "5452", + "type": "Gene", + "ncbi_homologene": "55674", + "valid": true, + "normalized": [5452], + "database": "ncbi_gene", + "normalized_id": 5452, + "biotype": "gene", + "name": "POU2F2", + "accession": "@GENE_POU2F2" + }, + "text": "OCT2", + "locations": [ + { + "offset": 824, + "length": 4 + } + ] + }, + { + "id": "26", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 833, + "length": 9 + } + ] + }, + { + "id": "27", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "p50", + "locations": [ + { + "offset": 843, + "length": 3 + } + ] + }, + { + "id": "28", + "infons": { + "identifier": "64332", + "type": "Gene", + "ncbi_homologene": "12734", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "text": "IkappaBzeta", + "locations": [ + { + "offset": 939, + "length": 11 + } + ] + }, + { + "id": "29", + "infons": { + "identifier": "4790", + "type": "Gene", + "ncbi_homologene": "2971", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "text": "NF-kappaB", + "locations": [ + { + "offset": 1029, + "length": 9 + } + ] + } + ], + "relations": [] + } + ], + "relations": [ + { + "id": "R1", + "infons": { + "score": "0.9989", + "role1": { + "identifier": "5452", + "type": "Gene", + "valid": true, + "normalized": [5452], + "database": "ncbi_gene", + "normalized_id": 5452, + "biotype": "gene", + "name": "POU2F2", + "accession": "@GENE_POU2F2" + }, + "role2": { + "identifier": "64332", + "type": "Gene", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "type": "Association" + }, + "nodes": [ + { + "refid": "0", + "role": "10,9" + } + ] + }, + { + "id": "R2", + "infons": { + "score": "0.9979", + "role1": { + "identifier": "4790", + "type": "Gene", + "valid": true, + "normalized": [4790], + "database": "ncbi_gene", + "normalized_id": 4790, + "biotype": "gene", + "name": "NFKB1", + "accession": "@GENE_NFKB1" + }, + "role2": { + "identifier": "64332", + "type": "Gene", + "valid": true, + "normalized": [64332], + "database": "ncbi_gene", + "normalized_id": 64332, + "biotype": "gene", + "name": "NFKBIZ", + "accession": "@GENE_NFKBIZ" + }, + "type": "Positive_Correlation" + }, + "nodes": [ + { + "refid": "1", + "role": "1,0" + } + ] + } + ], + "pmid": 38309274, + "pmcid": null, + "meta": {}, + "date": "2024-01-24T00:00:00Z", + "journal": "Mol Cell", + "authors": [ + "Alpsoy A", + "Wu XS", + "Pal S", + "Klingbeil O", + "Kumar P", + "El Demerdash O", + "Nalbant B", + "Vakoc CR" + ], + "relations_display": [ + { + "name": "associate|@GENE_POU2F2|@GENE_NFKBIZ" + }, + { + "name": "positive_correlate|@GENE_NFKB1|@GENE_NFKBIZ" + } + ] +} diff --git a/test/pubtator/pubtator.js b/test/pubtator/pubtator.js index 3206f8d1..25976d39 100644 --- a/test/pubtator/pubtator.js +++ b/test/pubtator/pubtator.js @@ -4,9 +4,12 @@ import _ from 'lodash'; import { Hint, HINT_TYPE, SECTION } from '../../src/model/hint.js'; import map from '../../src/server/routes/api/document/hint/pubtator.js'; +import pubtator_1 from './10.1016_j.molcel.2016.11.034.json'; import pubtator_2 from './10.1016_j.molcel.2019.03.023.json'; -import pubtator_5 from './10.1038_s41556-021-00642-9.json'; -import pubtator_6 from './10.1126_scisignal.abf3535.json'; +import pubtator_3 from './10.1016_j.molcel.2019.04.005.json'; +import pubtator_4 from './10.1038_s41556-021-00642-9.json'; +import pubtator_5 from './10.1126_scisignal.abf3535.json'; +import pubtator_6 from './10.1016_j.molcel.2024.01.007.json'; import pubtator_7 from './10.15252_embj.2023113616.json'; import pubtator_8 from './pubtator_8.json'; @@ -20,16 +23,14 @@ import pubtator_8 from './pubtator_8.json'; * In the comments below, There is a field at the end of each bioCDocument object called "text" , which is an array devided into 2 arrays, the first array is hints from title, and the second array is hints from abstract. */ const bioCDocuments = [ - // 0..1 (one organism hint in abstract) - pubtator_2, // pmid: 31003867 | 0 organism hints in title { } | 1 organism hints in abstract: { 10090 aka (Mus musculus): 1 } | text: [ [] , ["mouse"] ] - //0..1 (one organism hint in abstract) - pubtator_5, // pmid: 33664495 | 0 organism hints in title { } | 1 organism hints in abstract: { 10090 aka (Mus musculus): 1 } | text: [ [] , ["mice"] ] - //1..* (one organism hint in title and multiple organism hints in abstract) - pubtator_6, // pmid: 34546791 | 1 organism hints in title { 10090 aka (Mus musculus): 1 } | 4 organism hints in abstract: { 10090 aka (Mus musculus): 4 } | text: [ ["mice"] , ["mice" , "mice" , "mice" , "mice"] ] - //1..* (one organism hint in title and multiple organism hints in abstract) - pubtator_7, // pmid: 37317646 | 1 organism hints in title { 7227 aka (Drosophila melanogaster): 1 } | 2 organism hints in abstract: { 9606 aka (Homo sapiens): 1 , 7227 aka (Drosophila melanogaster): 1 } | text: [ [ "Drosophila" ] , [ "human" , "Drosophila" ] ] - //*..* (multiple organisms in title, and multiple organism hints in abstract) - pubtator_8, // pmid: 24633240 | 2 organism hints in title { 9606 aka (Homo sapiens): 1 , 10090 aka (Mus musculus): 1} | 2 organism hints in title { 9606 aka (Homo sapiens): 12 , 10090 aka (Mus musculus): 7} | text [ [ "human" , "mouse"] , [ "Mice", "human" , "human" , "mouse" , "human" , "mouse" , "human" , "mouse" , "human" , "human" , "mice" , "Human" , "human" , "mice" , "human" , "patients" , "mouse" , "human" , "human" ] ] + pubtator_1, + pubtator_2, + pubtator_3, + pubtator_4, + pubtator_5, + pubtator_6, + pubtator_7, + pubtator_8, ]; describe('pubtator', function () { @@ -44,6 +45,7 @@ describe('pubtator', function () { before(() => { hints = map(bioCDocument); + console.log(JSON.stringify(hints, null, 2)); }); it('Should map to a non-zero list', function () { @@ -77,11 +79,6 @@ describe('pubtator', function () { expect(inAbstract.length).to.equal(uniqInAbstract.length); }); - it(`Should be all an ORGANISM hint`, function () { - const isOrganism = (h) => h.type === HINT_TYPE.ORGANISM; - expect(hints.every(isOrganism)).to.be.true; - }); - it('Should aggregate organism hints correctly', function () { const aggregateCounts = hints.reduce((acc, hint) => { const key = `${hint._xref.id}_${hint.section}`; @@ -89,27 +86,93 @@ describe('pubtator', function () { return acc; }, {}); - if (pubtator_2 === bioCDocument) { + if (pubtator_1 === bioCDocument) { + const expectedCounts = { + '3551_abstract': 1, + '4212_abstract': 1, + '4212_title': 1, + '442920_title': 1, + '4790_abstract': 4, + '4792_abstract': 2, + '5533_abstract': 1, + '5533_title': 1, + '5970_abstract': 2, + '5970_title': 1, + 'MESH:D009369_abstract': 1, + 'MESH:D011471_abstract': 1, + 'MESH:D011471_title': 1, + 'MESH:D064129_abstract': 5, + }; + expect(aggregateCounts).to.deep.equal(expectedCounts); + } else if (pubtator_2 === bioCDocument) { const expectedCounts = { '10090_abstract': 1, + '625662_abstract': 5, + '625662_title': 1, + '69597_abstract': 1, + '73673_abstract': 3, + '73673_title': 1, + 'MESH:D007246_abstract': 1, }; expect(aggregateCounts).to.deep.equal(expectedCounts); - } else if (pubtator_5 === bioCDocument) { + } else if (pubtator_3 === bioCDocument) { + const expectedCounts = { + '29126_abstract': 11, + '29126_title': 2, + '4683_abstract': 1, + '5133_abstract': 3, + '54918_abstract': 1, + '672_abstract': 1, + 'MESH:D009369_abstract': 2, + }; + expect(aggregateCounts).to.deep.equal(expectedCounts); + } else if (pubtator_4 === bioCDocument) { const expectedCounts = { '10090_abstract': 1, + '108909_abstract': 8, + '108909_title': 1, + '22227_abstract': 7, + '22227_title': 1, + 'MESH:D002395_abstract': 2, + 'MESH:D007035_abstract': 1, }; expect(aggregateCounts).to.deep.equal(expectedCounts); - } else if (pubtator_6 === bioCDocument) { + } else if (pubtator_5 === bioCDocument) { const expectedCounts = { - '10090_title': 1, '10090_abstract': 4, + '10090_title': 1, + '16150_abstract': 5, + '16150_title': 1, + '16176_abstract': 3, + '18033_abstract': 9, + '18033_title': 1, + '19697_abstract': 5, + 'MESH:D002357_abstract': 3, + 'MESH:D007249_abstract': 1, + 'MESH:D009402_abstract': 1, + 'MESH:D010003_abstract': 5, + 'MESH:D010003_title': 1, + }; + expect(aggregateCounts).to.deep.equal(expectedCounts); + } else if (pubtator_6 === bioCDocument) { + const expectedCounts = { + '4790_abstract': 4, + '4790_title': 1, + '5450_abstract': 1, + '5452_abstract': 1, + '64332_abstract': 6, + '64332_title': 1, + 'MESH:D008223_abstract': 1, }; expect(aggregateCounts).to.deep.equal(expectedCounts); } else if (pubtator_7 === bioCDocument) { const expectedCounts = { + '39647_abstract': 1, + '7227_abstract': 1, '7227_title': 1, '9606_abstract': 1, - '7227_abstract': 1, + '9662_abstract': 1, + 'MESH:D002925_abstract': 1, }; expect(aggregateCounts).to.deep.equal(expectedCounts); } else if (pubtator_8 === bioCDocument) { @@ -118,10 +181,39 @@ describe('pubtator', function () { '10090_title': 1, '9606_abstract': 12, '10090_abstract': 7, + '947_abstract': 1, + 'MESH:D009369_abstract': 2, }; expect(aggregateCounts).to.deep.equal(expectedCounts); } }); + + describe(`Hint type ORGANISM`, function () { + it('Should have xref to NCBI Taxonomy', function () { + const hs = _.filter(hints, (o) => o.type === HINT_TYPE.ORGANISM); + hs.forEach((h) => { + expect(h.xref.id).to.match(/^\d+$/); // identifier + }); + }); + }); + + describe(`Hint type GGP`, function () { + it('Should have xref to NCBI Gene', function () { + const hs = _.filter(hints, (o) => o.type === HINT_TYPE.GGP); + hs.forEach((h) => { + expect(h.xref.id).to.match(/^\d+$/); // identifier + }); + }); + }); + + describe(`Hint type CHEMICAL`, function () { + it('Should have xref to MeSH', function () { + const hs = _.filter(hints, (o) => o.type === HINT_TYPE.CHEMICAL); + hs.forEach((h) => { + expect(h.xref.id).to.match(/^mesh:(C|D)\d{6,9}$/i); + }); + }); + }); }); // BioC Document }); // forEach }); // map diff --git a/test/pubtator/pubtator_8.json b/test/pubtator/pubtator_8.json index 3499bb2f..40e074fd 100644 --- a/test/pubtator/pubtator_8.json +++ b/test/pubtator/pubtator_8.json @@ -2,8 +2,8 @@ "_id": "24633240|None", "id": "24633240", "infons": { - "doi": "24633240 (needs finishing)...", - "comment": "Human, mouse (needs finishing)..." + "doi": "24633240 from pubtator", + "comment": "Human, mouse, CD34, MITRG, MISTRG, tumor, patients" }, "passages": [ { From 47880d9ab103b86af78b994afa34fd979a7cab8c Mon Sep 17 00:00:00 2001 From: fileoy Date: Thu, 8 Aug 2024 15:10:15 -0400 Subject: [PATCH 2/4] typo --- test/pubtator/pubtator.js | 1 - 1 file changed, 1 deletion(-) diff --git a/test/pubtator/pubtator.js b/test/pubtator/pubtator.js index 25976d39..1eb39320 100644 --- a/test/pubtator/pubtator.js +++ b/test/pubtator/pubtator.js @@ -45,7 +45,6 @@ describe('pubtator', function () { before(() => { hints = map(bioCDocument); - console.log(JSON.stringify(hints, null, 2)); }); it('Should map to a non-zero list', function () { From c33831e3541db8d47cae9e5fbe44c17850dfa872 Mon Sep 17 00:00:00 2001 From: fileoy Date: Thu, 8 Aug 2024 16:08:48 -0400 Subject: [PATCH 3/4] fixed logic error --- src/server/routes/api/document/hint/pubtator.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/routes/api/document/hint/pubtator.js b/src/server/routes/api/document/hint/pubtator.js index f01f27d0..1e9c0bf9 100644 --- a/src/server/routes/api/document/hint/pubtator.js +++ b/src/server/routes/api/document/hint/pubtator.js @@ -34,7 +34,7 @@ function map(bioCDocument) { [PUBTATOR_ANNOTATION_TYPE.CELL_LINE, HINT_TYPE.CELL_LINE], ]); const database2Xref = new Map([ - [PUBTATOR_ANNOTATION_TYPE.GENE, COLLECTIONS.NCBI_GENE], + [PUBTATOR_DATABASE.ncbi_gene, COLLECTIONS.NCBI_GENE], [PUBTATOR_DATABASE.ncbi_taxonomy, COLLECTIONS.NCBI_TAXONOMY], [PUBTATOR_DATABASE.ncbi_mesh, COLLECTIONS.MESH], [PUBTATOR_DATABASE.cvcl, COLLECTIONS.CELLOSAURUS], From 0db77393c2139f8e322917313454ccb724b3c723 Mon Sep 17 00:00:00 2001 From: fileoy Date: Fri, 9 Aug 2024 12:30:24 -0400 Subject: [PATCH 4/4] // Content: - hint model constructor modified to use setters to initialize properties and enforce validation - pubtator test updated to use new getter methods to access hint properties rather than direct access to properties --- src/model/hint.js | 18 +++++++++--------- test/pubtator/pubtator.js | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/model/hint.js b/src/model/hint.js index d23e46fc..1626fb7a 100644 --- a/src/model/hint.js +++ b/src/model/hint.js @@ -32,17 +32,17 @@ const SECTIONS = _.flatMap(SECTION); class Hint { /** * Creates an instance of Hint. - * @param {Array} param.texts - The texts associated with the hint. - * @param {string} param.type - The type of the hint. - * @param {Object} param.xref - The cross-reference (xref) object. - * @param {string} param.section - The section of the document where the hint was found. + * @param {Array} texts - The texts associated with the hint. + * @param {string} type - The type of the hint. + * @param {Object} xref - The cross-reference (xref) object. + * @param {string} section - The section of the document where the hint was found. */ constructor(texts, type, xref, section) { - // Initialize the properties using the setters to enforce validation - this._texts = texts; - this._type = type; - this._xref = xref; - this._section = section; + // Use setters to initialize properties and enforce validation + this.texts = texts; + this.type = type; + this.xref = xref; + this.section = section; } // Getter and setter for texts diff --git a/test/pubtator/pubtator.js b/test/pubtator/pubtator.js index 1eb39320..8b6dce29 100644 --- a/test/pubtator/pubtator.js +++ b/test/pubtator/pubtator.js @@ -80,8 +80,8 @@ describe('pubtator', function () { it('Should aggregate organism hints correctly', function () { const aggregateCounts = hints.reduce((acc, hint) => { - const key = `${hint._xref.id}_${hint.section}`; - acc[key] = (acc[key] || 0) + hint._texts.length; + const key = `${hint.xref.id}_${hint.section}`; + acc[key] = (acc[key] || 0) + hint.texts.length; return acc; }, {});