From 51161633c18351c5fa67c14dbbf8ca93f5fb7da6 Mon Sep 17 00:00:00 2001 From: anngvu <32753274+anngvu@users.noreply.github.com> Date: Thu, 18 Aug 2022 07:58:44 -0700 Subject: [PATCH] New rules terms (#197) * Add rule to nforce num for age * Update test variant to have all missing ages * Build jsonld * Add new report type * Update values for resource types * Build jsonld Co-authored-by: nf-osi[bot] --- NF.csv | 5 +++-- NF.jsonld | 24 +++++++++++++++++++++++- modules/Biosample/annotationProperty.csv | 2 +- modules/Data/Metadata.csv | 1 + modules/Data/annotationProperty.csv | 2 +- tests/GenomicsAssayTemplate_1.csv | 4 ++-- 6 files changed, 31 insertions(+), 7 deletions(-) diff --git a/NF.csv b/NF.csv index 47bd0b81..6b5e2a23 100644 --- a/NF.csv +++ b/NF.csv @@ -525,7 +525,7 @@ modelSystemName,"A group of presumed common ancestry with clear-cut physiologica nf2Genotype,"Genotype of NF2 gene in the biospecimen from which the data were derived, if known","-/-, +/-, +/+, +/?, -/?, Unknown",,FALSE,,neurofibromatosis,,,,nf2Genotype,Property,Biosample,annotationProperty,,Genotype,, nf1Genotype,"Genotype of NF1 gene in the biospecimen from which the data were derived, if known","-/-, +/-, +/+, +/?, -/?, Unknown",,FALSE,,neurofibromatosis,,,,nf1Genotype,Property,Biosample,annotationProperty,,Genotype,, specimenPreparationMethod,"Term that represents preservation of the sample before usage in, e.g. sequencing","Fresh collected, Flash frozen, FFPE, Cryopreserved, OCT, RNAlater, formalin-fixed, ethanol, Viably frozen",,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=sample&anchor=preservation_method,ngs,,,,specimenPreparationMethod,Property,Biosample,annotationProperty,,Specimen_State,, -age,A numeric value representing age of the individual. Use with ageUnit.,,ageUnit,FALSE,Sage Bionetworks,,,,,age,DataProperty,Biosample,annotationProperty,biosampleAnnotation,,, +age,A numeric value representing age of the individual. Use with ageUnit.,,ageUnit,FALSE,Sage Bionetworks,,,,num,age,DataProperty,Biosample,annotationProperty,biosampleAnnotation,,, ageUnit,"A time unit that can be used with a given age value, e.g. years.","days, months, years",,FALSE,Sage Bionetworks,,,,,ageUnit,Property,Biosample,annotationProperty,biosampleAnnotation,,, bioSampleUsed,The biosample used.,,,,,,,,,,,,,,,, isXenograft,Whether or not sample source is a xenograft (Yes; No),"Yes, No",,TRUE,,experimentalData,,,,isXenograft,DataProperty,Biosample,annotationProperty,,,, @@ -671,6 +671,7 @@ HRC,Human haplotype reference panel,,,FALSE,http://www.haplotype-reference-conso experimentalData,"Any file derived from or pertaining to a scientific experiment. experimentalData annotations should be applied, possibly disease-related",,,FALSE,Sage Bionetworks,resourceType,,,,Experimental_Data,Class,Data,Metadata,Resource,,, result,"Any file that reports data results. Examples include figures, presentations, analysis, etc.",,,FALSE,Sage Bionetworks,resourceType,,,,Result,Class,Data,Metadata,Resource,,, tool,"Any file or link that represents a tool, model, or algorithm; the tool annotations could be applied",,,FALSE,Sage Bionetworks,resourceType,,,,Tool,Class,Data,Metadata,Resource,,, +workflow report,"Workflow-generated reports of analysis of primary data, usually created programmatically at completion of workflow step.",,,FALSE,,resourceType,,,,Workflow_Report,Class,Data,Metadata,Report,,, report,"a document assembled by an author for the purpose of providing information for the audience. A report is the output of a documenting process and has the objective to be consumed by a specific audience. Topic of the report is on something that has completed. A report is not a single figure. Examples of reports are journal article, patent application, grant progress report, case report (not patient record).",,,FALSE,http://purl.obolibrary.org/obo/IAO_0000088,resourceType,,,,Report,Class,Data,Metadata,Resource,,, metadata,Data about data; information that describes another set of data.,,,FALSE,http://purl.obolibrary.org/obo/NCIT_C52095,resourceType,,,,metadata,Class,Data,Metadata,Resource,,, protocol,"A plan specification which has sufficient level of detail and quantitative information to communicate it between investigation agents, so that different investigation agents will reliably be able to independently reproduce the process.",,,FALSE,http://purl.obolibrary.org/obo/OBI_0000272,resourceType,,,,Protocol,Class,Data,Metadata,Resource,,, @@ -686,7 +687,7 @@ Filename,The name of the file.,,"entityId,fileFormat",TRUE,Sage Bionetworks,NF,, programmingLanguage,A computer programming language,"Python, R, MATLAB, Java, C, C++, C#, Javascript, bash",,FALSE,Sage Bionetworks,,,,,programmingLanguage,Property,Data,annotationProperty,ngsParameter,,, runtimePlatform,"Runtime platform or script interpreter dependencies (e.g. Java v1, Python 2.3).",,,FALSE,Sage Bionetworks,,,,,runtimePlatform,Property,Data,annotationProperty,ngsParameter,,, documentation,URL to any documentation describing the resource and its use.,,,FALSE,Sage Bionetworks,,,,,documentation,Property,Data,annotationProperty,ngsParameter,,, -resourceType,The type of resource being stored and annotated,"experimentalData, curatedData, result, tool, report, metadata, protocol",,TRUE,,sageCommunity,,,,resourceType,Property,Data,annotationProperty,,Resource,, +resourceType,The type of resource being stored and annotated,"experimentalData, curatedData, result, tool, report, metadata, protocol, workflow report",,TRUE,,sageCommunity,,,,resourceType,Property,Data,annotationProperty,,Resource,, fileFormat,"Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","bash script,bedgraph,ai,idx,idat,bam,bai,excel,powerpoint,tif,png,doc,pdf,hdf,fasta,fastq,sam,vcf,bcf,maf,bed,chp,cel,sif,tsv,csv,txt,plink,bigwig,wiggle,gct,bgzip,zip,seg,html,mov,hyperlink,svs,md,flagstat,gtf,raw,msf,rmd,bed narrowPeak,bed broadPeak,bed gappedPeak,avi,pzfx,fig,xml,tar,R script,abf,bpm,dat,jpg,locs,Sentrix descriptor file,Python script,sav,gzip,sdf,RData,hic,ab1,7z,gff3,json,sqlite,svg,sra,recal,tranches,mtx,tagAlign,dup,DICOM,czi,mzML,SPAR,SDAT,nii,PAR,REC,hdr,img,sf,MATLAB script, MATLAB data,tom",,TRUE,,sageCommunity,,,,fileFormat,Property,Data,annotationProperty,,File_Format,one, progressReportNumber,"Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable",,FALSE,,template,,,,progressReportNumber,DataProperty,Data,annotationProperty,,,one, comments,Brief free-text comments that may also be important to understanding the resource.,,,FALSE,Sage Bionetworks,experimentalData,,,,comments,DataProperty,Data,annotationProperty,,,many, diff --git a/NF.jsonld b/NF.jsonld index 2707244d..9723e25c 100644 --- a/NF.jsonld +++ b/NF.jsonld @@ -12613,7 +12613,9 @@ "@id": "bts:AgeUnit" } ], - "sms:validationRules": [] + "sms:validationRules": [ + "num" + ] }, { "@id": "bts:AgeUnit", @@ -15199,6 +15201,23 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:Workflowreport", + "@type": "rdfs:Class", + "rdfs:comment": "Workflow-generated reports of analysis of primary data, usually created programmatically at completion of workflow step.", + "rdfs:label": "Workflowreport", + "rdfs:subClassOf": [ + { + "@id": "bts:ResourceType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "workflow report", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:Report", "@type": "rdfs:Class", @@ -15709,6 +15728,9 @@ }, { "@id": "bts:Protocol" + }, + { + "@id": "bts:Workflowreport" } ], "sms:displayName": "resourceType", diff --git a/modules/Biosample/annotationProperty.csv b/modules/Biosample/annotationProperty.csv index 89337f05..5697cfd9 100644 --- a/modules/Biosample/annotationProperty.csv +++ b/modules/Biosample/annotationProperty.csv @@ -22,7 +22,7 @@ modelSystemName,"A group of presumed common ancestry with clear-cut physiologica nf2Genotype,"Genotype of NF2 gene in the biospecimen from which the data were derived, if known","-/-, +/-, +/+, +/?, -/?, Unknown",,FALSE,,neurofibromatosis,,,,nf2Genotype,Property,Biosample,annotationProperty,,Genotype,, nf1Genotype,"Genotype of NF1 gene in the biospecimen from which the data were derived, if known","-/-, +/-, +/+, +/?, -/?, Unknown",,FALSE,,neurofibromatosis,,,,nf1Genotype,Property,Biosample,annotationProperty,,Genotype,, specimenPreparationMethod,"Term that represents preservation of the sample before usage in, e.g. sequencing","Fresh collected, Flash frozen, FFPE, Cryopreserved, OCT, RNAlater, formalin-fixed, ethanol, Viably frozen",,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=sample&anchor=preservation_method,ngs,,,,specimenPreparationMethod,Property,Biosample,annotationProperty,,Specimen_State,, -age,A numeric value representing age of the individual. Use with ageUnit.,,ageUnit,FALSE,Sage Bionetworks,,,,,age,DataProperty,Biosample,annotationProperty,biosampleAnnotation,,, +age,A numeric value representing age of the individual. Use with ageUnit.,,ageUnit,FALSE,Sage Bionetworks,,,,num,age,DataProperty,Biosample,annotationProperty,biosampleAnnotation,,, ageUnit,"A time unit that can be used with a given age value, e.g. years.","days, months, years",,FALSE,Sage Bionetworks,,,,,ageUnit,Property,Biosample,annotationProperty,biosampleAnnotation,,, bioSampleUsed,The biosample used.,,,,,,,,,,,,,,,, isXenograft,Whether or not sample source is a xenograft (Yes; No),"Yes, No",,TRUE,,experimentalData,,,,isXenograft,DataProperty,Biosample,annotationProperty,,,, diff --git a/modules/Data/Metadata.csv b/modules/Data/Metadata.csv index e17760db..ea5ef92e 100644 --- a/modules/Data/Metadata.csv +++ b/modules/Data/Metadata.csv @@ -16,6 +16,7 @@ HRC,Human haplotype reference panel,,,FALSE,http://www.haplotype-reference-conso experimentalData,"Any file derived from or pertaining to a scientific experiment. experimentalData annotations should be applied, possibly disease-related",,,FALSE,Sage Bionetworks,resourceType,,,,Experimental_Data,Class,Data,Metadata,Resource,,, result,"Any file that reports data results. Examples include figures, presentations, analysis, etc.",,,FALSE,Sage Bionetworks,resourceType,,,,Result,Class,Data,Metadata,Resource,,, tool,"Any file or link that represents a tool, model, or algorithm; the tool annotations could be applied",,,FALSE,Sage Bionetworks,resourceType,,,,Tool,Class,Data,Metadata,Resource,,, +workflow report,"Workflow-generated reports of analysis of primary data, usually created programmatically at completion of workflow step.",,,FALSE,,resourceType,,,,Workflow_Report,Class,Data,Metadata,Report,,, report,"a document assembled by an author for the purpose of providing information for the audience. A report is the output of a documenting process and has the objective to be consumed by a specific audience. Topic of the report is on something that has completed. A report is not a single figure. Examples of reports are journal article, patent application, grant progress report, case report (not patient record).",,,FALSE,http://purl.obolibrary.org/obo/IAO_0000088,resourceType,,,,Report,Class,Data,Metadata,Resource,,, metadata,Data about data; information that describes another set of data.,,,FALSE,http://purl.obolibrary.org/obo/NCIT_C52095,resourceType,,,,metadata,Class,Data,Metadata,Resource,,, protocol,"A plan specification which has sufficient level of detail and quantitative information to communicate it between investigation agents, so that different investigation agents will reliably be able to independently reproduce the process.",,,FALSE,http://purl.obolibrary.org/obo/OBI_0000272,resourceType,,,,Protocol,Class,Data,Metadata,Resource,,, diff --git a/modules/Data/annotationProperty.csv b/modules/Data/annotationProperty.csv index b1221613..d0be8245 100644 --- a/modules/Data/annotationProperty.csv +++ b/modules/Data/annotationProperty.csv @@ -11,7 +11,7 @@ Filename,The name of the file.,,"entityId,fileFormat",TRUE,Sage Bionetworks,NF,, programmingLanguage,A computer programming language,"Python, R, MATLAB, Java, C, C++, C#, Javascript, bash",,FALSE,Sage Bionetworks,,,,,programmingLanguage,Property,Data,annotationProperty,ngsParameter,,, runtimePlatform,"Runtime platform or script interpreter dependencies (e.g. Java v1, Python 2.3).",,,FALSE,Sage Bionetworks,,,,,runtimePlatform,Property,Data,annotationProperty,ngsParameter,,, documentation,URL to any documentation describing the resource and its use.,,,FALSE,Sage Bionetworks,,,,,documentation,Property,Data,annotationProperty,ngsParameter,,, -resourceType,The type of resource being stored and annotated,"experimentalData, curatedData, result, tool, report, metadata, protocol",,TRUE,,sageCommunity,,,,resourceType,Property,Data,annotationProperty,,Resource,, +resourceType,The type of resource being stored and annotated,"experimentalData, curatedData, result, tool, report, metadata, protocol, workflow report",,TRUE,,sageCommunity,,,,resourceType,Property,Data,annotationProperty,,Resource,, fileFormat,"Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","bash script,bedgraph,ai,idx,idat,bam,bai,excel,powerpoint,tif,png,doc,pdf,hdf,fasta,fastq,sam,vcf,bcf,maf,bed,chp,cel,sif,tsv,csv,txt,plink,bigwig,wiggle,gct,bgzip,zip,seg,html,mov,hyperlink,svs,md,flagstat,gtf,raw,msf,rmd,bed narrowPeak,bed broadPeak,bed gappedPeak,avi,pzfx,fig,xml,tar,R script,abf,bpm,dat,jpg,locs,Sentrix descriptor file,Python script,sav,gzip,sdf,RData,hic,ab1,7z,gff3,json,sqlite,svg,sra,recal,tranches,mtx,tagAlign,dup,DICOM,czi,mzML,SPAR,SDAT,nii,PAR,REC,hdr,img,sf,MATLAB script, MATLAB data,tom",,TRUE,,sageCommunity,,,,fileFormat,Property,Data,annotationProperty,,File_Format,one, progressReportNumber,"Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable",,FALSE,,template,,,,progressReportNumber,DataProperty,Data,annotationProperty,,,one, comments,Brief free-text comments that may also be important to understanding the resource.,,,FALSE,Sage Bionetworks,experimentalData,,,,comments,DataProperty,Data,annotationProperty,,,many, diff --git a/tests/GenomicsAssayTemplate_1.csv b/tests/GenomicsAssayTemplate_1.csv index d7554001..22194c76 100644 --- a/tests/GenomicsAssayTemplate_1.csv +++ b/tests/GenomicsAssayTemplate_1.csv @@ -1,6 +1,6 @@ Component,Filename,resourceType,progressReportNumber,dataType,assay,platform,individualID,parentSpecimenID,runType,libraryPrep,comments,age,ageUnit,aliquotID,cellType,dataSubtype,diagnosis,dissociationMethod,fileFormat,isCellLine,isPrimaryCell,isStranded,libraryPreparationMethod,modelSystemName,nf1Genotype,nf2Genotype,nucleicAcidSource,organ,readDepth,readLength,readPair,readPairOrientation,readStrandOrigin,sex,species,specimenID,specimenPreparationMethod,tissue,tumorType,entityId -GenomicsAssayTemplate,a,experimentalData,,genomicVariants,rnaSeq,,,,,,,10,,,schwann,,,,,,,,,,,,,,,,1,,,,,,,,,syn32531841 -GenomicsAssayTemplate,b,experimentalData,,genomicVariants,rnaSeq,,,,,,,10.5,,,schwann,,,,,,,,,,,,,,,,1,,,,,,,,,syn32531837 +GenomicsAssayTemplate,a,experimentalData,,genomicVariants,rnaSeq,,,,,,,,,,schwann,,,,,,,,,,,,,,,,1,,,,,,,,,syn32531841 +GenomicsAssayTemplate,b,experimentalData,,genomicVariants,rnaSeq,,,,,,,,,,schwann,,,,,,,,,,,,,,,,1,,,,,,,,,syn32531837 GenomicsAssayTemplate,c,experimentalData,,genomicVariants,rnaSeq,,,,,,,,,,schwann,,,,,,,,,,,,,,,,,,,,,,,,,syn32531834 GenomicsAssayTemplate,d,experimentalData,,genomicVariants,rnaSeq,,,,,,,,,,schwann,,,,,,,,,,,,,,,,,,,,,,,,,syn32531832 GenomicsAssayTemplate,e,experimentalData,,genomicVariants,rnaSeq,,,,,,,,,,schwann,,,,,,,,,,,,,,,,,,,,,,,,,syn32531831