From f0fad92c08adde17d610772f5720ba3e8ef9416a Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu <32753274+anngvu@users.noreply.github.com> Date: Wed, 12 Apr 2023 09:23:09 -0600 Subject: [PATCH] Patch/docs (#278) * Update template docs * Refactor docs code and update templates * Rename a module file * Add Dockerfile for doc-builder image * Clean up some dependencies and imports * Add workflow for docs * Add workflow comment * Build jsonld --------- Co-authored-by: nf-osi[bot] --- .github/workflows/publish-docs.yml | 45 + NF.csv | 64 +- NF.jsonld | 39 +- docs/Dockerfile | 9 + docs/docTemplate.R | 74 +- docs/graph.R | 7 +- docs/index.Rmd | 468 +-- docs/index.html | 3721 +++++++++++++++-- docs/templates/Clinical_Assay_Template.csv | 68 +- .../Dynamic_Light_Scattering_Template.csv | 15 + docs/templates/Epigenetics_Assay_Template.csv | 76 +- docs/templates/Genomics_Assay_Template.csv | 88 +- docs/templates/Imaging_Assay_Template.csv | 64 +- .../Light_Scattering_Assay_Template.csv | 17 + docs/templates/MRI_Assay_Template.csv | 70 +- .../templates/Patient_Timepoints_Template.csv | 24 +- .../Pharmacokinetics_Assay_Template.csv | 76 +- .../Plate_Based_Reporter_Assay_Template.csv | 74 +- .../Processed_Aligned_Reads_Template.csv | 90 +- .../Processed_Expression_Template.csv | 66 +- .../Processed_Variant_Calls_Template.csv | 45 +- docs/templates/Proteomics_Assay_Template.csv | 70 +- docs/templates/RNASeq_Template.csv | 90 +- docs/templates/ScRNASeq_Template.csv | 88 +- docs/templates/Source_Code_Template.csv | 20 +- docs/templates/WES_Template.csv | 86 +- docs/templates/WGS_Template.csv | 84 +- modules/Data/{Metadata.csv => Resource.csv} | 0 28 files changed, 4278 insertions(+), 1360 deletions(-) create mode 100644 .github/workflows/publish-docs.yml create mode 100644 docs/Dockerfile create mode 100644 docs/templates/Dynamic_Light_Scattering_Template.csv create mode 100644 docs/templates/Light_Scattering_Assay_Template.csv rename modules/Data/{Metadata.csv => Resource.csv} (100%) diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml new file mode 100644 index 00000000..ea82a8cb --- /dev/null +++ b/.github/workflows/publish-docs.yml @@ -0,0 +1,45 @@ +name: Build and publish docs to GH Pages + +on: + push: + branches: + - main + + # TODO setup conditional to build but not push + #pull_request: + # branches: + # - main + +jobs: + + build-and-publish: + runs-on: ubuntu-latest + + permissions: + contents: read + pages: write + id-token: write + + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup GH Pages + uses: actions/configure-pages@v3 + + - name: Build using docker + run: | + docker run -v $(pwd):/app ghcr.io/nf-osi/data-model-docs + + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: docs + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/NF.csv b/NF.csv index 7b3d0d38..1eb63c15 100644 --- a/NF.csv +++ b/NF.csv @@ -259,37 +259,37 @@ readDepth,"If available, the coverage statistic as output from bedtools coverage isPairedEnd,(Legacy/deprecated annotation) Whether or not is paired-end sequencing (Yes; No). Note that this can be inferred as 'Yes' when runType=pairedEnd; current templates actually use runType to capture this info.,"Yes, No",,FALSE,,ngs,,,,isPairedEnd,DataProperty,Assay,annotationProperty,ngsParameter,,, totalReads,"If available, the total number of reads collected from samtools.",,,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads&anchor=total_reads,ngs,,,,totalReads,DataProperty,Assay,annotationProperty,ngsParameter,,, targetDepth,"The targeted read depth prior to sequencing.",,,FALSE,https://data.humantumoratlas.org/standards/bulkrnaseq#TargetDepth,ngs,,,,targetDepth,DataProperty,Assay,annotationProperty,ngsParameter,,, -raw counts,The number or amount of something.,"",assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25463,dataType,,"","",Raw_Counts,Class,Data,Data_Class,"","","","" -Volume,The amount of three dimensional space occupied by an object or the capacity of a space or container.,"",assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25335,dataType,,"","",Volume,Class,Data,Data_Class,"","","","" -Weight,The vertical force exerted by a mass as a result of gravity.,"",assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25208,dataType,,"","",Weight,Class,Data,Data_Class,"","","","" -Pharmacokinetic Study,"A study of the process by which a drug is absorbed, distributed, metabolized, and eliminated by the body.","",assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C49663,dataType,,"","",Pharmacokinetic_Study,Class,Data,Data_Class,"","","","" -genomicVariants,"Genomic alterations, including single nucleotide polymorphisms, short indels and structural variants. Use more specific term if possible, esp. if data is only of one specific subset.","",assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Foperation_3227,dataType,,"","",Genomic_Variants,Class,Data,Data_Class,"","","","" -AlignedReads,"Aligned reads output from alignment workflows","",assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads,dataType,,"","",Aligned_Reads,Class,Data,Data_Class,"","","","" -SomaticVariants,"Called somatic variants","",assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_somatic_mutation,dataType,,"","",Somatic_Variants,Class,Data,Data_Class,"",,"","" -AnnotatedSomaticVariants,"Somatic variants annotated with some annotation workflow","",assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=annotated_somatic_mutation,dataType,,"","",Annotated_Somatic_Variants,Class,Data,Data_Class,"","","","" -GermlineVariants,"Called germline variants","",assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_germline_variation,dataType,,"","",Germline_Variants,Class,Data,Data_Class,"","","","" -AnnotatedGermlineVariants,"Germline variants annotated with some annotation workflow","",assay,FALSE,,dataType,,"","",Annotated_Germline_Variants,Class,Data,Data_Class,"","","","" -StructuralVariants,"Specifically genomic variants data classified as structural variants, which may be derived from specialized variant calling workflows","",assay,FALSE,,dataType,,"","",Structural_Variants,Class,Data,Data_Class,"","","","" -behavior process,"The action, reaction, or performance of an organism in response to external or internal stimuli.","",assay,FALSE,hhttps://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation,dataType,,"","",Behavior_Process,Class,Data,Data_Class,"","","","" -metabolomics,"The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.","",assay,FALSE,http://edamontology.org/topic_3172,dataType,,"","",Metabolomics,Class,Data,Data_Class,"","","","" -image,Biological or biomedical data that has been rendered into an image.,"",assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Fdata_2968,dataType,,"","",Image,Class,Data,Data_Class,"","","","" -geneExpression,The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.,"",assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Ftopic_0203,dataType,,"","",Gene_Expression,Class,Data,Data_Class,"","","","" -isoformExpression,Expression of protien isoforms formed from alternative splicings or other post-translational modifications of a single gene through RNA splicing mechanisms.,"",assay,FALSE,https://en.wikipedia.org/wiki/Protein_isoform,dataType,,"","",Isoform_Expression,Class,Data,Data_Class,"","","","" -proteomics,"Protein and peptide identification, especially in the study of whole proteomes of organisms.","",assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Ftopic_0121,dataType,,"","",Proteomics,Class,Data,Data_Class,"","","","" -particle characterization,"A series of analytical methods that provide information about entities such as composition, structure and defects. ",,assay,FALSE,https://ontobee.org/ontology/NCIT?iri=http://purl.obolibrary.org/obo/NCIT_C62317,dataType,,,,Particle_Characterization,Class,Data,Data_Class,"","","","" -kinomics,Kinomics is the study of protein kinases and protein kinase signaling.,"",assay,FALSE,http://www.kinomecore.com/what-is-kinomics/,dataType,,"","",Kinomics,Class,Data,Data_Class,"","","","" -drugScreen,Information on drug sensitivity and molecular markers of drug response,"",assay,FALSE,https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3531057/,dataType,,"","",Drug_Screen,Class,Data,Data_Class,"","","","" -drugCombinationScreen,Information on drug sensitivity of more than one compound,"","",FALSE,https://www.ncbi.nlm.nih.gov/pubmed/29344898,dataType,,"","",Drug_Combination_Screen,Class,Data,Data_Class,"","","","" -cellularPhysiology,"","",assay,FALSE,"",dataType,,"","",Cellular_Physiology,Class,Data,Data_Class,"","","","" -chromatinActivity,Chromatin activity that allow access of condensed genomic DNA and potentially control gene expression.,"","",FALSE,https://en.wikipedia.org/wiki/Chromatin_remodeling,dataType,,"","",Chromatin_Activity,Class,Data,Data_Class,"","","","" -surveyData,A data set that contains the outcome of a survey.,"",assay,FALSE,http://purl.obolibrary.org/obo/OMIABIS_0000060,dataType,,"","",Survey_Data,Class,Data,Data_Class,"","","","" -network,An interconnected system of things or people.,"","",FALSE,http://purl.obolibrary.org/obo/NCIT_C61377,dataType,,"","",Network,Class,Data,Data_Class,"","","","" -clinical,Data obtained through patient examination or treatment.,"","",FALSE,http://purl.obolibrary.org/obo/NCIT_C15783,dataType,,"","",Clinical_Data,Class,Data,Data_Class,"","","","" -immunoassay,Laboratory test involving interaction of antigens with specific antibodies.,"","",FALSE,http://purl.obolibrary.org/obo/NCIT_C16723,dataType,,"","","","",Data,Data_Class,"","","","" -electrophysiology,Data generated from an electrophysiology assay.,"","",FALSE,http://purl.obolibrary.org/obo/ERO_0000564,dataType,,"","",Electrophysiology,Class,Data,Data_Class,"","","","" -mask image,"Image used as the mask for an image processing operation, such as subtraction.","","",FALSE,http://dicom.nema.org/resources/ontology/DCM/121321,dataType,,"","",Mask_Image,Class,Data,Data_Class,"","","","" -curatedData,"Any file derived from or pertaining a manually or programatically curated data resource. Examples include: reference sequences, drug information databases, identifier maps""""","","",FALSE,"","",,"","",Curated_Data,Class,Data,Data_Class,"","","","" -characteristic,"Broad data type for measures that can encompass volume, weight, brightness, color, capacity, etc.","",assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25447,dataType,,"","",characteristic,Class,Data,Data_Class,"","","","" +raw counts,The number or amount of something.,,assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25463,dataType,,,,Raw_Counts,Class,Data,Data_Class,,,, +Volume,The amount of three dimensional space occupied by an object or the capacity of a space or container.,,assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25335,dataType,,,,Volume,Class,Data,Data_Class,,,, +Weight,The vertical force exerted by a mass as a result of gravity.,,assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25208,dataType,,,,Weight,Class,Data,Data_Class,,,, +Pharmacokinetic Study,"A study of the process by which a drug is absorbed, distributed, metabolized, and eliminated by the body.",,assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C49663,dataType,,,,Pharmacokinetic_Study,Class,Data,Data_Class,,,, +genomicVariants,"Genomic alterations, including single nucleotide polymorphisms, short indels and structural variants. Use more specific term if possible, esp. if data is only of one specific subset.",,assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Foperation_3227,dataType,,,,Genomic_Variants,Class,Data,Data_Class,,,, +AlignedReads,Aligned reads output from alignment workflows,,assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads,dataType,,,,Aligned_Reads,Class,Data,Data_Class,,,, +SomaticVariants,Called somatic variants,,assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_somatic_mutation,dataType,,,,Somatic_Variants,Class,Data,Data_Class,,,, +AnnotatedSomaticVariants,Somatic variants annotated with some annotation workflow,,assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=annotated_somatic_mutation,dataType,,,,Annotated_Somatic_Variants,Class,Data,Data_Class,,,, +GermlineVariants,Called germline variants,,assay,FALSE,https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_germline_variation,dataType,,,,Germline_Variants,Class,Data,Data_Class,,,, +AnnotatedGermlineVariants,Germline variants annotated with some annotation workflow,,assay,FALSE,,dataType,,,,Annotated_Germline_Variants,Class,Data,Data_Class,,,, +StructuralVariants,"Specifically genomic variants data classified as structural variants, which may be derived from specialized variant calling workflows",,assay,FALSE,,dataType,,,,Structural_Variants,Class,Data,Data_Class,,,, +behavior process,"The action, reaction, or performance of an organism in response to external or internal stimuli.",,assay,FALSE,hhttps://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation,dataType,,,,Behavior_Process,Class,Data,Data_Class,,,, +metabolomics,"The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.",,assay,FALSE,http://edamontology.org/topic_3172,dataType,,,,Metabolomics,Class,Data,Data_Class,,,, +image,Biological or biomedical data that has been rendered into an image.,,assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Fdata_2968,dataType,,,,Image,Class,Data,Data_Class,,,, +geneExpression,The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.,,assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Ftopic_0203,dataType,,,,Gene_Expression,Class,Data,Data_Class,,,, +isoformExpression,Expression of protien isoforms formed from alternative splicings or other post-translational modifications of a single gene through RNA splicing mechanisms.,,assay,FALSE,https://en.wikipedia.org/wiki/Protein_isoform,dataType,,,,Isoform_Expression,Class,Data,Data_Class,,,, +proteomics,"Protein and peptide identification, especially in the study of whole proteomes of organisms.",,assay,FALSE,https://www.ebi.ac.uk/ols/ontologies/edam/terms?iri=http%3A%2F%2Fedamontology.org%2Ftopic_0121,dataType,,,,Proteomics,Class,Data,Data_Class,,,, +particle characterization,"A series of analytical methods that provide information about entities such as composition, structure and defects. ",,assay,FALSE,https://ontobee.org/ontology/NCIT?iri=http://purl.obolibrary.org/obo/NCIT_C62317,dataType,,,,Particle_Characterization,Class,Data,Data_Class,,,, +kinomics,Kinomics is the study of protein kinases and protein kinase signaling.,,assay,FALSE,http://www.kinomecore.com/what-is-kinomics/,dataType,,,,Kinomics,Class,Data,Data_Class,,,, +drugScreen,Information on drug sensitivity and molecular markers of drug response,,assay,FALSE,https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3531057/,dataType,,,,Drug_Screen,Class,Data,Data_Class,,,, +drugCombinationScreen,Information on drug sensitivity of more than one compound,,,FALSE,https://www.ncbi.nlm.nih.gov/pubmed/29344898,dataType,,,,Drug_Combination_Screen,Class,Data,Data_Class,,,, +cellularPhysiology,,,assay,FALSE,,dataType,,,,Cellular_Physiology,Class,Data,Data_Class,,,, +chromatinActivity,Chromatin activity that allow access of condensed genomic DNA and potentially control gene expression.,,,FALSE,https://en.wikipedia.org/wiki/Chromatin_remodeling,dataType,,,,Chromatin_Activity,Class,Data,Data_Class,,,, +surveyData,A data set that contains the outcome of a survey.,,assay,FALSE,http://purl.obolibrary.org/obo/OMIABIS_0000060,dataType,,,,Survey_Data,Class,Data,Data_Class,,,, +network,An interconnected system of things or people.,,,FALSE,http://purl.obolibrary.org/obo/NCIT_C61377,dataType,,,,Network,Class,Data,Data_Class,,,, +clinical,Data obtained through patient examination or treatment.,,,FALSE,http://purl.obolibrary.org/obo/NCIT_C15783,dataType,,,,Clinical_Data,Class,Data,Data_Class,,,, +immunoassay,Laboratory test involving interaction of antigens with specific antibodies.,,,FALSE,http://purl.obolibrary.org/obo/NCIT_C16723,dataType,,,,,,Data,Data_Class,,,, +electrophysiology,Data generated from an electrophysiology assay.,,,FALSE,http://purl.obolibrary.org/obo/ERO_0000564,dataType,,,,Electrophysiology,Class,Data,Data_Class,,,, +mask image,"Image used as the mask for an image processing operation, such as subtraction.",,,FALSE,http://dicom.nema.org/resources/ontology/DCM/121321,dataType,,,,Mask_Image,Class,Data,Data_Class,,,, +curatedData,"Any file derived from or pertaining a manually or programatically curated data resource. Examples include: reference sequences, drug information databases, identifier maps""""",,,FALSE,,,,,,Curated_Data,Class,Data,Data_Class,,,, +characteristic,"Broad data type for measures that can encompass volume, weight, brightness, color, capacity, etc.",,assay,FALSE,http://purl.obolibrary.org/obo/NCIT_C25447,dataType,,,,characteristic,Class,Data,Data_Class,,,, normalized,A data set that is produced as the output of a normalization data transformation.,"","",FALSE,http://purl.obolibrary.org/obo/OBI_0000451,dataSubtype,,"","",Normalized,Class,Data,Data_Level,"","","","" dataMatrix,A file of data containing multiple values for multiple samples.,"","",FALSE,"",dataSubtype,,"","",Data_Matrix,Class,Data,Data_Level,"","","","" raw,"A data file produced by an instrument, or one with very little subsequent processing.","","",FALSE,"",dataSubtype,,"","",Raw,Class,Data,Data_Level,"","","","" @@ -415,7 +415,7 @@ reference sequence,Syntactic sequences that has a role as reference of an annota dataSubtype,"Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","normalized, dataMatrix, raw, processed, metadata, representative",,TRUE,,experimentalData,,,,dataSubtype,Property,Data,annotationProperty,,Data_Level,one, metadataType,"For files of dataSubtype: metadata, a description of the type of metadata in the file.","individual, biospecimen, assay, data dictionary, manifest, protocol",,FALSE,,experimentalData,,,,metadataType,Property,Data,annotationProperty,,,, dataType,"A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","Volume, Weight, Pharmacokinetic Study, genomicVariants,behavior process, metabolomics, image, geneExpression, isoformExpression, proteomics, particle characterization, kinomics, drugScreen, drugCombinationScreen, cellularPhysiology, chromatinActivity, surveyData, network, clinical, immunoassay, electrophysiology, raw counts, mask image,AlignedReads,SomaticVariants,AnnotatedSomaticVariants,StructuralVariants,GermlineVariants,AnnotatedGermlineVariants,characteristic",dataSubtype,TRUE,,experimentalData,,,,dataType,Property,Data,annotationProperty,,Data,one, -expressionUnit,Measure used for transcript expression quantification,"TPM,RPKM,FPKM,Counts,Other",,TRUE,,experimentalData,,,,expressionUnit,DataProperty,Data,annotationProperty,,, +expressionUnit,Measure used for transcript expression quantification,"TPM,RPKM,FPKM,Counts,Other",,TRUE,,experimentalData,,,,expressionUnit,DataProperty,Data,annotationProperty,,,, failedQC,"Whether the sample or data failed QC checks (Yes, No)","Yes, No",,TRUE,,experimentalData,,,,failedQC,DataProperty,Data,annotationProperty,,, Filename,The name of the file.,,"entityId,fileFormat",TRUE,Sage Bionetworks,NF,,,,fileName,Class,Data,annotationProperty,fileName,,one,Automatically filled when using the curator app. programmingLanguage,A computer programming language,"Python, R, MATLAB, Java, C, C++, C#, Javascript, bash",,FALSE,Sage Bionetworks,,,,,programmingLanguage,Property,Data,annotationProperty,ngsParameter,,, diff --git a/NF.jsonld b/NF.jsonld index 01074358..b41e0391 100644 --- a/NF.jsonld +++ b/NF.jsonld @@ -7558,6 +7558,28 @@ ], "sms:validationRules": [] }, + { + "@id": "bts:Particlecharacterization", + "@type": "rdfs:Class", + "rdfs:comment": "A series of analytical methods that provide information about entities such as composition, structure and defects. ", + "rdfs:label": "Particlecharacterization", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "particle characterization", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Assay" + } + ], + "sms:validationRules": [] + }, { "@id": "bts:Kinomics", "@type": "rdfs:Class", @@ -21519,23 +21541,6 @@ "sms:required": "sms:false", "sms:validationRules": [] }, - { - "@id": "bts:Particlecharacterization", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Particlecharacterization", - "rdfs:subClassOf": [ - { - "@id": "bts:DataType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "particle characterization", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:TPM", "@type": "rdfs:Class", diff --git a/docs/Dockerfile b/docs/Dockerfile new file mode 100644 index 00000000..6a5b7bf6 --- /dev/null +++ b/docs/Dockerfile @@ -0,0 +1,9 @@ +FROM ghcr.io/nf-osi/nfportalutils:develop + +WORKDIR /app + +RUN apt-get -yq install pandoc + +RUN R -e "install.packages(c('rmarkdown', 'reactable', 'visNetwork'), repos='http://cran.rstudio.com/')" + +ENTRYPOINT ["/bin/bash", "R", "-e", "rmarkdown::render('docs/index.Rmd')"] diff --git a/docs/docTemplate.R b/docs/docTemplate.R index a85bddef..f1533dcd 100644 --- a/docs/docTemplate.R +++ b/docs/docTemplate.R @@ -15,48 +15,72 @@ #' #' Currently, schematic templates allow modeling more on the simplistic side and #' don't formally express all these, so only a few are checked. -#' Moreover, the jsonld version encodes much less information than the csv version -#' (jsonld conversion loses custom metadata in the csv), which is why this currently depends on both formats. +#' Currently, the jsonld version loses some information when translated from the csv source +#' (mainly the summary Range definition corresponding to https://www.w3.org/TR/rdf-schema/#ch_range and EditorNote). #' #' @param templates Named vector of templates to process, #' where names corresponds to id without prefix (currently whatever follows "bts:"), #' and value is the real internal ID (in .ID). -#' @param schema_csv Schema representation read from `.csv`. -#' @param schema_jsonld Schema path to jsonld file. +#' @param schema Schema list object parsed from a schematic jsonld. +#' @param prefix Namespace prefix. #' @param savedir Directory where template representations will be outputted. +#' @param verbose Whether to be verbose about what's going on. docTemplate <- function(templates, - schema_csv, - schema_jsonld = "../NF.jsonld", - savedir = "templates/") { + schema, + prefix = "bts:", + savedir = "templates/", + verbose = TRUE) { for(x in names(templates)) { # e.g. x <- "GenomicsAssayTemplate" # For template, parse DependsOn to get all props present in manifest - props <- nfportalutils::get_dependency_from_json_schema(paste0("bts:", x), - schema = schema_jsonld) + prop_ids <- nfportalutils::get_dependency_from_json_schema(paste0(prefix, x), + schema = schema, + return_labels = FALSE) - # Create the ControlledVocab aka Range col for each prop - # ControlledVocab col is handled specially and uses a custom Range col defined in csv - # For CV col we create a link to a class if the term editor has referenced a class in Range, - # else we simply fall back to enumerating the valid values - index <- match(props, schema_csv$Attribute) - range <- dplyr::if_else(schema_csv[index, "Range"] != "", - paste0("#", schema_csv[index, "Range"]), - schema_csv[index, "Valid.Values"]) + # The range of prop `assay` is anything of class `Assay` -- + # However, the json-ld does not make this so conceptually concise for props, instead listing all possible values + # In the docs, we don't want to enumerate all values and instead want to create a _link_ to a class that defines the range + # To do this, we can infer class by look up the class of the first listed enum for that prop + # The range could also be inferred to be a boolean or string/integer rather than a class + summarize_range <- function(prop_id, schema, return_labels = FALSE) { + + enums <- nfportalutils::get_by_prop_from_json_schema(id = prop_id, + prop = "schema:rangeIncludes", + schema = schema, + return_labels = FALSE) + if(is.null(enums)) return("") + if(length(enums) < 5) return(paste(gsub("bts:", "", enums), collapse = ",")) + if("bts:Yes" %in% enums) return("Y/N") + enum1 <- enums[1] + # additional lookup class + class <- nfportalutils::get_by_prop_from_json_schema(enum1, + prop = "rdfs:subClassOf", + schema = schema, + return_labels = FALSE)[[1]] + if(length(class) > 1) warning(enum1, " has multiple parent classes") + class <- sub("bts:", "", class[1]) # use first but warn + class <- paste0("#", class) + class + } - template_tab <- data.table(Field = props, - Description = schema_csv[index, "Description"], - Required = ifelse(schema_csv[index, "Required"], "required", "optional"), - ControlledVocab = range, - # Cardinality = schema_csv[index, "Cardinality"], - Note = schema_csv[index, "EditorNote"]) + # because of the way schematic imports biothings without us having much control over it some ids can be duplicated (!) + schema <- schema[!duplicated(sapply(schema, function(x) x$`@id`))] + sms <- Filter(function(x) x$`@id` %in% prop_ids, schema) + sms <- lapply(sms, function(x) { + list(Field = x$`sms:displayName`, + Description = if(!is.null(x$`rdfs:comment`)) x$`rdfs:comment` else " ", + Required = if(!is.null(x$`sms:required`)) sub("sms:", "", x$`sms:required`) else "?", + ValidRange = summarize_range(x$`@id`, schema)) + }) + tt <- rbindlist(sms) # Sort to show by required, then alphabetically - template_tab <- template_tab[order(-Required, Field), ] + tt <- tt[order(-Required, Field), ] template_id <- templates[x] filepath <- paste0(savedir, template_id, ".csv") - write.csv(template_tab, file = filepath, row.names = F) + write.csv(tt, file = filepath, row.names = F) } } diff --git a/docs/graph.R b/docs/graph.R index cbe54c9f..6a4d6785 100644 --- a/docs/graph.R +++ b/docs/graph.R @@ -1,5 +1,4 @@ library(visNetwork) -library(tidyverse) #-------------------------------------------------------------------------------# @@ -10,7 +9,7 @@ library(tidyverse) # schema <- readExtSchema("NF.csv") readExtSchema <- function(schema_csv, ext_classes_csv = "ext_classes.csv") { schema <- read.csv(schema_csv) %>% - select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf) + dplyr::select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf) # Extended class definitions ext_classes <- read.csv(ext_classes_csv) %>% @@ -32,7 +31,7 @@ getNodesEdges <- function(schema, cluster_root, font.color = list(A = "white", C = "white")) ) { cluster <- schema %>% - filter(Root == cluster_root) + dplyr::filter(Root == cluster_root) # Namespaces for cluster ancestor vs Children A <- paste(prefix, "A", sep = "_") @@ -66,7 +65,7 @@ c2Cluster <- function(cluster_1, cluster_2, connect_by, # Configure between-cluster relations relations <- read.csv(ext_relations_csv, header = T) edges <- relations %>% - filter(property == connect_by) + dplyr::filter(property == connect_by) relations$color <- viz$color relations$width <- viz$width diff --git a/docs/index.Rmd b/docs/index.Rmd index 2200237c..78c24c3d 100644 --- a/docs/index.Rmd +++ b/docs/index.Rmd @@ -11,16 +11,16 @@ output: collapsed: false toc_depth: 4 params: - schema_csv: "../NF.csv" + schema_json: "../NF.jsonld" ext_classes_csv: "../ext_classes.csv" ext_relations_csv: "../ext_relations.csv" + graph_view: false use_cache: false --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(data.table) -library(tidyverse) library(reactable) library(htmltools) # library(visNetwork) @@ -28,6 +28,7 @@ library(htmltools) source("graph.R") source("docTemplate.R") + basicTable <- function(dt, columns = c("Attribute", "Description")) { reactable(dt[, columns], filterable = if(nrow(dt) > 20) TRUE else FALSE, @@ -50,6 +51,8 @@ expandedTable <- function(dt) { ) detail } + + reactable(dt[, c("Attribute", "Description")], filterable = TRUE, @@ -81,57 +84,71 @@ templateTable <- function(dt) { Field = colDef(minWidth = 180), Description = colDef(minWidth = 300), Required = colDef(maxWidth = 100), - ControlledVocab = colDef(name = "Controlled\nVocab", - cell = refRange) + ValidRange = colDef(name = "Valid Range", cell = refRange) ), pagination = FALSE, wrap = TRUE, class = "term-table") } + + +module_section <- function(module, + submodule, + submodule_id = submodule) { + + for(i in seq_along(submodule)) { + file <- glue::glue("../modules/{module}/{submodule[i]}.csv") + data <- fread(file) + + cat(paste0("\n", "#### ", submodule[i], " {#", submodule_id[i], " .tabset .tabset-fade .tabset-pills}", "\n")) + + table <- data[, .(Attribute, Description, DependsOn)] + + print( + tagList( + expandedTable(table), tags$br())) + } +} + ``` ```{r process_schema_table, echo=FALSE} -# To avoid overwhelming info and keep page neat, process only selected columns -# Aside from default schematic columns, -# we require internal columns prefixed with `.` to have a useful table -schema <- read.csv(params$schema_csv) %>% - select(Attribute, Description, Required, Valid.Values, DependsOn, Parent, - ID = .ID, Type = .Type, Root = .Root, SubOf = .SubOf, - Range = .Range, EditorNote = .EditorNote) - +schema <- jsonlite::read_json(params$schema_json) +schema <- schema$`@graph` # docTemplate needs the unmodified schema table +# this can be factored out as an external config if(!params$use_cache) { - docTemplate(templates = c(# genomics/transcriptomics - GenomicsAssayTemplate = "Genomics_Assay_Template", - WGSTemplate = "WGS_Template", - WESTemplate = "WES_Template", - EpigeneticsAssayTemplate = "Epigenetics_Assay_Template", - RNASeqTemplate = "RNASeq_Template", - ScRNASeqTemplate = "ScRNASeq_Template", - - # genomics/transcriptomics -- processed - ProcessedAlignedReadsTemplate = "Processed_Aligned_Reads_Template", - ProcessedVariantCallsTemplate = "Processed_Variant_Calls_Template", - ProcessedExpressionTemplate = "Processed_Expression_Template", - - # proteomics - ProteomicsAssayTemplate = "Proteomics_Assay_Template", - - # imaging - ImagingAssayTemplate = "Imaging_Assay_Template", - MRIAssayTemplate = "MRI_Assay_Template", - - # other - PharmacokineticsAssayTemplate = "Pharmacokinetics_Assay_Template", # drug assays type - PlateBasedReporterAssayTemplate = "Plate_Based_Reporter_Assay_Template", - ClinicalAssayTemplate = "Clinical_Assay_Template", - PatientTimepointsTemplate = "Patient_Timepoints_Template", - SourceCodeTemplate = "Source_Code_Template" - ), - schema_csv = schema - ) + templates <- c(# genomics/transcriptomics + GenomicsAssayTemplate = "Genomics_Assay_Template", + WGSTemplate = "WGS_Template", + WESTemplate = "WES_Template", + EpigeneticsAssayTemplate = "Epigenetics_Assay_Template", + RNASeqTemplate = "RNASeq_Template", + ScRNASeqTemplate = "ScRNASeq_Template", + + # genomics/transcriptomics -- processed + ProcessedAlignedReadsTemplate = "Processed_Aligned_Reads_Template", + ProcessedVariantCallsTemplate = "Processed_Variant_Calls_Template", + ProcessedExpressionTemplate = "Processed_Expression_Template", + + # proteomics + ProteomicsAssayTemplate = "Proteomics_Assay_Template", + + # imaging + ImagingAssayTemplate = "Imaging_Assay_Template", + MRIAssayTemplate = "MRI_Assay_Template", + + # other + PharmacokineticsAssayTemplate = "Pharmacokinetics_Assay_Template", # drug assays type + PlateBasedReporterAssayTemplate = "Plate_Based_Reporter_Assay_Template", + ClinicalAssayTemplate = "Clinical_Assay_Template", + PatientTimepointsTemplate = "Patient_Timepoints_Template", + SourceCodeTemplate = "Source_Code_Template", + LightScatteringAssayTemplate = "Light_Scattering_Assay_Template") + + docTemplate(templates, schema = schema) } # Splits terms into Property or Class using `Type` @@ -148,24 +165,17 @@ if(!params$use_cache) { :::info -These are standard terms available to NF resource contributors for annotation of their resource. -When contributors use these terms, they are helping to label and classify their resource for improved interoperability and findability. +These are standard terms available to NF resource contributors for annotation of their resource. +Terms are grouped into modules. +When using these terms, contributors are helping to label and classify resources for improved interoperability and findability. ::: -### Assay Module - -#### Assay {#Assay .tabset .tabset-fade .tabset-pills} +### Assay -##### Terms - -```{r assays_table, echo=FALSE} - -assays_table <- schema %>% - filter(Root == "Assay") %>% - select(Attribute, Description, DependsOn) +```{r tables, echo=FALSE,results='asis'} -expandedTable(assays_table) +module_section("Assay", c("Assay", "Platform")) ``` @@ -180,7 +190,7 @@ More specialized templates may be made available as needed for specific assays. ::: -```{r schema_ext_assay_template, out.width="100%", echo=FALSE, eval=TRUE} +```{r schema_ext_assay_template, out.width="100%", echo=FALSE, eval=params$graph_view} schema_ext <- readExtSchema(params$schema_csv, params$ext_classes_csv) assay <- getNodesEdges(schema_ext, "Assay", "A", @@ -194,344 +204,57 @@ defaultGraph(g_assay_template) ``` -#### Platform {#Platform .tabset .tabset-fade .tabset-pills} - -##### Terms - -```{r platforms_table, echo=FALSE, eval=TRUE} - -platforms_table <- schema %>% - filter(Parent == "platform") %>% - select(Attribute, Description, DependsOn) - -basicTable(platforms_table) -``` - - -##### Relations Graph - -:::info - -This partial graph view logically relates **assays** to common **platforms**. - -::: - -*Documentation currently in development.* - -#### Parameters - -:::info - -The parameters used should be specified specific to the assay and platform. - -::: - -##### NGS Parameter {#NGS_Parameter} - -###### Library Prep {#Library_Prep} - -```{r echo=FALSE} - -prep_params <- schema %>% - filter(Parent == "libraryPrep") %>% - select(Attribute, Description, DependsOn) - -basicTable(prep_params) -``` - -###### Library Prep Method {#Library_Preparation_Method} - -```{r echo=FALSE} - -prep_method_params <- schema %>% - filter(Parent == "libraryPrepMethod") %>% - select(Attribute, Description, DependsOn) - -basicTable(prep_method_params) -``` - - -###### Read Pair {#Read_Pair} - -```{r echo=FALSE} - -read_params <- schema %>% - filter(Parent == "readPair") %>% - select(Attribute, Description, DependsOn) - -basicTable(read_params) -``` - -###### Read Pair Orientation {#Read_Pair_Orientation} - -```{r echo=FALSE} - -orientation_params <- schema %>% - filter(Parent == "readPairOrientation") %>% - select(Attribute, Description, DependsOn) - -basicTable(orientation_params) -``` - -###### Run Type {#Run_Type} -```{r echo=FALSE} - -run_params <- schema %>% - filter(Parent == "runType") %>% - select(Attribute, Description, DependsOn) - -basicTable(run_params) -``` - - - -### Data Module - -#### Data Type {#Data .tabset .tabset-fade .tabset-pills} - -##### Terms - -```{r data_types_table, echo=FALSE} - -data_types_table <- schema %>% - filter(Parent == "dataType") %>% - select(Attribute, Description, DependsOn) - -basicTable(data_types_table) -``` - -##### Relations Graph - -:::info - -This partial graph view logically relates **data types** to **assays**. - -::: - -*Documentation currently in development.* - -#### Data Level {#Data .tabset .tabset-fade .tabset-pills} - -##### Terms - -```{r data_subtypes_table, echo=FALSE} - -data_level_table <- schema %>% - filter(Parent == "dataSubtype") %>% - select(Attribute, Description, DependsOn) - -basicTable(data_level_table) -``` - -#### File Format {#File_Format .tabset .tabset-fade .tabset-pills} - -##### Terms - -:::info -Certain formats are emphasized as
proprietary
, which make data less interoperable/reusable, as opposed to our
preferred
(open) formats. +### Information Entity -::: +```{r data_module, echo=FALSE, results='asis'} -```{r data_formats_table, echo=FALSE} - -data_formats_table <- schema %>% - filter(Parent == "fileFormat") %>% - select(Attribute, Description, Note = SubOf) - -# A little more complicated than the other tables -reactable(data_formats_table, - filterable = TRUE, - pagination = FALSE, - columns = list( - Attribute = colDef(name = "Label", maxWidth = 150), - Note = colDef( - maxWidth = 100, - cell = function(value) { - label <- switch(value, Preferred_Open_Format = "preferred", Proprietary_Format = "proprietary", "") - class <- switch(value, Preferred_Open_Format = "good", Proprietary_Format = "warning", "") - div(class = paste(class, "badge"), label) - } - ) - ), - wrap = FALSE, - class = "term-table" - ) +module_section("Data", + c("Resource", "Data_Class", "File_Format"), + c("ResourceType", "DataType", "FileFormat")) ``` -##### Relations Graph - -:::info - -This partial graph view logically organizes **data formats** and notes which formats are interconvertible. - -::: -*Documentation currently in development.* - -### Biosample Module +### Sample :::info -Biosample combines metadata at the individual-level and specimen-level. +Sample typically refers to a **biosample**, but on some rare occasions can refer to an inorganic sample from which data are generated. +For biosamples, there is a distinction between individual-level and specimen-level data. Data can be linked to individual-level sample information such as sex, species, diagnosis, and genotype. +Data can be linked to specimen-level information such as sample site (the organ or body part), specimen tissue or cell type, tumor class (if specimen is a tumor), and specimen state. ::: - -#### Sex {#Sex .tabset .tabset-fade .tabset-pills} +```{r sample_module, echo=FALSE, results='asis'} -##### Terms -```{r sex_table, echo=FALSE} +module_section("Sample", + c("Body_Part", "Sex", "Species", "Disease", "Genotype", "Tissue", "Tumor", "Cell", "Cell_Line_Model", "Mouse_Model", "Specimen_Processing"), + c("Organ", "Sex", "Species", "Diagnosis", "Genotype", "Tissue", "Tumor", "CellType", "ModelSystemName", "ModelSystemName", "SpecimenProcessingMethod")) -sex_table <- schema %>% - filter(Parent == "sex") %>% - select(Attribute, Description, DependsOn) - -basicTable(sex_table) ``` -#### Species {#Species .tabset .tabset-fade .tabset-pills} - -##### Terms -```{r species_table, echo=FALSE} -species_table <- schema %>% - filter(Parent == "species") %>% - select(Attribute, Description, DependsOn) - -basicTable(species_table) -``` - -#### Diagnosis {#Diagnosis .tabset .tabset-fade .tabset-pills} - -##### Terms -```{r diagnosis_table, echo=FALSE} - -diagnosis_table <- schema %>% - filter(Parent == "diagnosis") %>% - select(Attribute, Description, DependsOn) - -basicTable(diagnosis_table) -``` - - -#### Genotype {#Genotype .tabset .tabset-fade .tabset-pills} - -##### Terms -```{r genotypes_table, echo=FALSE} - -genotypes_table <- schema %>% - filter(Root == "Genotype") %>% - select(Attribute, Description, DependsOn) - -basicTable(genotypes_table) -``` +### Experiment :::info -Data can be linked to specimen-level information such as sample site (the organ or body part), specimen tissue or cell type, tumor class (if specimen is a tumor), and specimen state. +Terminology to help characterize experiment. ::: - - -#### Sample Site {.tabset .tabset-fade .tabset-pills} - -##### Terms - -```{r sample_site_table, echo=FALSE} +```{r biosample_module, echo=FALSE, results='asis'} -sample_site_table <- schema %>% - filter(Parent %in% c("organ", "bodyPart")) %>% - select(Attribute, Description, DependsOn) +module_section("Experiment", + c("Unit"), + c("TimePointUnit")) -basicTable(sample_site_table) ``` -#### Sample Specimen - -##### Tissue {#Tissue} -```{r tissue_table, echo=FALSE} - -tissue <- schema %>% - filter(Parent %in% c("tissue", "cellType")) %>% - select(Attribute, Description, DependsOn) - -basicTable(tissue) -``` - -##### Cell / Cell Line {#Cell} -```{r echo=FALSE} - -cell_table <- schema %>% - filter(SubOf %in% c("Primary_Cell", "Cell_Line_Model")) %>% - select(Attribute, Description, DependsOn) - -basicTable(cell_table) -``` - -#### Specimen Dissociation {#Dissociation_Method} - -*Documentation in development.* - -#### Specimen State {#Specimen_State} - -*Documentation in development.* - -#### Tumor Class {#Tumor .tabset .tabset-fade .tabset-pills} - -##### Terms -```{r tumor_table, echo=FALSE} - -tumor_table <- schema %>% - filter(Parent == "tumorType" | Parent == "Tumor") %>% - select(Attribute, Description, DependsOn) - -basicTable(tumor_table) -``` - -### Experiment Module - -#### Gene Perturbation - -##### Gene Perturbation Type {#Gene_Perturbation} -```{r echo=FALSE} - -perturb_type <- schema %>% - filter(Parent == "genePerturbationType") %>% - select(Attribute, Description, DependsOn) - -basicTable(perturb_type) -``` - -##### Gene Perturbation Technology {#Gene_Perturbation_Technology} - -```{r echo=FALSE} - -perturb_tech <- schema %>% - filter(Parent == "genePerturbationTechnology") %>% - select(Attribute, Description, DependsOn) - -basicTable(perturb_tech) -``` - -### Cross-Module - -:::info - -This includes modules that are shared. - -::: - -#### Model System {#Model_System} - -*Documentation in development.* - ## Annotation Templates :::info @@ -548,13 +271,14 @@ For example, the "assay" property allows the contributor to use [terms under Ass #### Genomics Assay Templates {#Genomics_Assay_Templates .tabset .tabset-fade .tabset-pills} ```{r standard_properties_table, echo=FALSE, eval=FALSE} + # CURRENTLY NOT RUN (eval=FALSE) # Custom code to get unique set of properties used across all templates -standard_properties_table <- schema %>% - filter(Parent %in% c("template")) %>% - select(Attribute, Description, DependsOn) - -basicTable(standard_properties_table) +# standard_properties_table <- schema %>% +# filter(Parent %in% c("template")) %>% +# select(Attribute, Description, DependsOn) +# +# basicTable(standard_properties_table) ``` ##### Genomics Assay (Generic) @@ -682,6 +406,16 @@ templateTable(read.csv("templates/Clinical_Assay_Template.csv")) ``` +#### Other Assays {.tabset .tabset-fade .tabset-pills} + + +##### Light Scattering +```{r echo=FALSE } + +templateTable(read.csv("templates/Light_Scattering_Assay_Template.csv")) + +``` + #### Non-Assay Templates {#Non_Assay_Template .tabset .tabset-fade .tabset-pills} ##### Patient Timepoints @@ -711,7 +445,7 @@ They are computed/automated annotations set by the data platform and DCC activit ::: ##### Terms -```{r reserved_properties_table, echo=FALSE} +```{r reserved_properties_table, echo=FALSE,eval=F} reserved_properties_table <- schema %>% filter(SubOf %in% c("dccProperty", "synapseProperty")) %>% diff --git a/docs/index.html b/docs/index.html index 634371c7..64e9f813 100644 --- a/docs/index.html +++ b/docs/index.html @@ -14,46 +14,3066 @@ NF Vocabulary / Schema - - + + - - - - + + + + - - - - - +h1.title {font-size: 38px;} +h2 {font-size: 30px;} +h3 {font-size: 24px;} +h4 {font-size: 18px;} +h5 {font-size: 16px;} +h6 {font-size: 12px;} +code {color: inherit; background-color: rgba(0, 0, 0, 0.04);} +pre:not([class]) { background-color: white } + + + + + - - - - - - - - - + + + + + + + +code{white-space: pre-wrap;} +span.smallcaps{font-variant: small-caps;} +span.underline{text-decoration: underline;} +div.column{display: inline-block; vertical-align: top; width: 50%;} +div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} +ul.task-list{list-style: none;} + - -
-
Relations Graph
-
-

This partial graph view logically relates assays to metadata templates available at the NF Data Curator App. For example, assays under the classification of Imaging_Assay currently uses a generic Imaging_Assay_Template for annotation. More specialized templates may be made available as needed for specific assays.

-
-
- +
+
+ +


Platform

-
-
Terms
-
- +
+
-
+ +


+
Relations Graph
-

This partial graph view logically relates assays to common platforms.

-
-

Documentation currently in development.

-
-
-
-

Parameters

-
-

The parameters used should be specified specific to the assay and platform.

+

This partial graph view logically relates assays to +metadata templates available at the NF Data +Curator App. For example, assays under the classification of +Imaging_Assay currently uses a generic +Imaging_Assay_Template for annotation. More specialized +templates may be made available as needed for specific assays.

-
-
NGS Parameter
-
-
Library Prep
-
-
-
-
Library Prep Method
-
-
-
-
Read Pair
-
- -
-
-
Read Pair Orientation
-
- -
-
-
Run Type
-
- -
-
-
-
-
-

Data Module

-
-

Data Type

-
-
Terms
-
- -
-
-
Relations Graph
-
-

This partial graph view logically relates data types to assays.

-
-

Documentation currently in development.

+
+

Information Entity

+
+

Resource

+
+
-
-

Data Level

-
-
Terms
-
- + +


+
+

Data_Class

+
+
-
-

File Format

-
-
Terms
-
-Certain formats are emphasized as -
-proprietary + +


-, which make data less interoperable/reusable, as opposed to our -
-preferred +
+

File_Format

+
+
-

(open) formats.

+ +


-
-
-
-
Relations Graph
+
+

Sample

-

This partial graph view logically organizes data formats and notes which formats are interconvertible.

-
-

Documentation currently in development.

-
+

Sample typically refers to a biosample, but on some +rare occasions can refer to an inorganic sample from which data are +generated. For biosamples, there is a distinction between +individual-level and specimen-level data. Data can be linked to +individual-level sample information such as sex, species, diagnosis, and +genotype. Data can be linked to specimen-level information such as +sample site (the organ or body part), specimen tissue or cell type, +tumor class (if specimen is a tumor), and specimen state.

+
+

Body_Part

+
+
-
-

Biosample Module

-
-

Biosample combines metadata at the individual-level and specimen-level. Data can be linked to individual-level sample information such as sex, species, diagnosis, and genotype.

+ +


-

Sex

-
-
Terms
-
- +
+
+ +


Species

-
-
Terms
-
- +
+
+ +


-

Diagnosis

-
-
Terms
-
- +

Disease

+
+
+ +


Genotype

-
-
Terms
-
- -
-

Data can be linked to specimen-level information such as sample site (the organ or body part), specimen tissue or cell type, tumor class (if specimen is a tumor), and specimen state.

-
- -
+
+
-
-

Sample Site

-
-
Terms
-
- + +


+
+

Tissue

+
+
-
-

Sample Specimen

-
-
Tissue
-
- + +


-
-
Cell / Cell Line
-
- +
+

Tumor

+
+
+ +


-
-

Specimen Dissociation

-

Documentation in development.

+
+

Cell

+
+
-
-

Specimen State

-

Documentation in development.

+ +


-
-

Tumor Class

-
-
Terms
-
- +
+

Cell_Line_Model

+
+
+ +


+
+

Mouse_Model

+
+
-
-

Experiment Module

-
-

Gene Perturbation

-
-
Gene Perturbation Type
-
- + +


-
-
Gene Perturbation Technology
-
- +
+

Specimen_Processing

+
+
+ +


-
-

Cross-Module

+
+

Experiment

-

This includes modules that are shared.

+

Terminology to help characterize experiment.

+
+
+

Unit

+
+
-
-

Model System

-

Documentation in development.

+ +


Annotation Templates

-

Annotation templates are spreadsheet templates that allow contributors to annotate resources uploaded to the repository. See interactive use of these templates at our NF Data Curator App. Templates implement “minimum metadata” standards specific to the type of data/resource (hence variants exist for assay types and “raw” vs “processed” data). Templates also contain common components, e.g. many will collect core sample info associated with the data.
-Unless the template field is free-text, it is meant to be filled by the contributor using the ontology terms/controlled vocabulary defined here. For example, the “assay” property allows the contributor to use terms under Assay.

+

Annotation templates are spreadsheet templates that allow +contributors to annotate resources uploaded to the repository. See +interactive use of these templates at our NF Data +Curator App. Templates implement “minimum metadata” standards +specific to the type of data/resource (hence variants exist for assay +types and “raw” vs “processed” data). Templates also contain common +components, e.g. many will collect core sample info associated with the +data.
+Unless the template field is free-text, it is meant to be filled by the +contributor using the ontology terms/controlled vocabulary defined here. +For example, the “assay” property allows the contributor to use terms under Assay.

-

Genomics Assay Templates

+

Genomics Assay +Templates

Genomics Assay (Generic)
-
- +
+
Epigenetics Assay
-
- +
+
WES Assay
-
- +
+
WGS Assay
-
- +
+
Bulk RNA-Seq Assay
-
- +
+
Single-cell RNA-Seq Assay
-
- +
+
Processed Aligned Reads
-
- +
+
Processed Variant Calls
-
- +
+
Processed Expression
-
- +
+
-

Proteomics Assay Templates

+

Proteomics Assay +Templates

Proteomics Assay (Generic)
-
- +
+

Imaging Assay Templates

Imaging Assay (Generic)
-
- +
+
MRI Assay
-
- +
+

Other Assay Templates

Pharmacokinetics Assay
-
- +
+
Plate Based Reporter Assay
-
- +
+
-

Clinical Assay Templates

+

Clinical Assay +Templates

Clinical Assay (Generic)
-
- +
+ +
+
+
+

Other Assays

+
+
Light Scattering
+
+

Non-Assay Templates

Patient Timepoints
-
- +
+
Source Code
-
- +
+
@@ -614,12 +3670,13 @@

Other

Reserved Properties

-

Reserved annotation properties also describe the resource in some way, but they are not expected to be edited directly by contributors. They are computed/automated annotations set by the data platform and DCC activities.

+

Reserved annotation properties also describe the resource in some +way, but they are not expected to be edited directly by contributors. +They are computed/automated annotations set by the data platform and DCC +activities.

-
+
Terms
-
-
diff --git a/docs/templates/Clinical_Assay_Template.csv b/docs/templates/Clinical_Assay_Template.csv index 007723f7..d2158def 100644 --- a/docs/templates/Clinical_Assay_Template.csv +++ b/docs/templates/Clinical_Assay_Template.csv @@ -1,34 +1,34 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Dynamic_Light_Scattering_Template.csv b/docs/templates/Dynamic_Light_Scattering_Template.csv new file mode 100644 index 00000000..b12e1e77 --- /dev/null +++ b/docs/templates/Dynamic_Light_Scattering_Template.csv @@ -0,0 +1,15 @@ +"Field","Description","Required","ControlledVocab","Note" +"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" +"Filename","The name of the file.","required","","Automatically filled when using the curator app." +"assay","The technology used to generate the data in this file","required","#Assay","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" +"resourceType","The type of resource being stored and annotated","required","#Resource","" +"concentrationMaterial","Numeric value for concentration of the material","optional","","" +"concentrationMaterialUnit","Unit used for the material concentration, e.g. mg/mL","optional","mg/mL,mM","" +"concentrationNaCl","Numeric value for NaCl concentration","optional","","" +"concentrationNaClUnit","Unit used for the NaCl concentration, e.g. mM","optional","mg/mL,mM","" +"materialType","Type of material in the characterization","optional","nanoparticles,polymeric nanoparticles,small molecule,DNA","" +"pH","Numeric value for pH sample parameter","optional","","" +"entityId",NA,NA,NA,NA diff --git a/docs/templates/Epigenetics_Assay_Template.csv b/docs/templates/Epigenetics_Assay_Template.csv index 865d6207..2f6ffdc3 100644 --- a/docs/templates/Epigenetics_Assay_Template.csv +++ b/docs/templates/Epigenetics_Assay_Template.csv @@ -1,38 +1,38 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Genomics_Assay_Template.csv b/docs/templates/Genomics_Assay_Template.csv index ece9b0d3..98d182ff 100644 --- a/docs/templates/Genomics_Assay_Template.csv +++ b/docs/templates/Genomics_Assay_Template.csv @@ -1,42 +1,46 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"readLength","Number of base pairs (bp) sequenced for a read","required","","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"isStranded","Whether or not the library is stranded (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readPair","The read of origin","optional","#Read_Pair","" -"readPairOrientation","The relative orientation of the reads in a paired-end protocol","optional","#Read_Pair_Orientation","" -"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","optional","#Read_Strand_Origin","" -"runType","Is the sequencing run single or paired end?","optional","#Run_Type","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"readLength","Number of base pairs (bp) sequenced for a read","true","" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"isStranded","Whether or not the library is stranded (Yes; No)","false","Yes,No" +"isXenograft","Whether or not sample source is a xenograft (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readPair","The read of origin","false","" +"readPairOrientation","The relative orientation of the reads in a paired-end protocol","false","Inward,Outward,Matching,Fr-firststrand" +"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","false","Forward,Reverse" +"runType","Is the sequencing run single or paired end?","false","SingleEnd,PairedEnd" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"transplantationRecipientSpecies","Species into which donor tissue was grown","false","Human,Mouse" +"transplantationRecipientTissue","Tissue into which a xenograph sample is transplanted","false","" +"transplantationType","Type of transplantation involved in the experiment, derived from MESH","false","Allograft,Xenograft,Autograft,Isograft" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Imaging_Assay_Template.csv b/docs/templates/Imaging_Assay_Template.csv index 34035811..94b6d621 100644 --- a/docs/templates/Imaging_Assay_Template.csv +++ b/docs/templates/Imaging_Assay_Template.csv @@ -1,32 +1,32 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","false","" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Light_Scattering_Assay_Template.csv b/docs/templates/Light_Scattering_Assay_Template.csv new file mode 100644 index 00000000..1db5ac3b --- /dev/null +++ b/docs/templates/Light_Scattering_Assay_Template.csv @@ -0,0 +1,17 @@ +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"assay","The technology used to generate the data in this file","false","#Assay" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"concentrationMaterial","Numeric value for concentration of the material","false","" +"concentrationMaterialUnit","Unit used for the material concentration, e.g. mg/mL","false","Mg/mL,MM,Particles/mL" +"concentrationNaCl","Numeric value for NaCl concentration","false","" +"concentrationNaClUnit","Unit used for the NaCl concentration, e.g. mM","false","Mg/mL,MM" +"entityId","TBD","false","" +"materialType","Type of material in the characterization","false","Nanoparticles,Polymericnanoparticles,Smallmolecule,DNA" +"pH","Numeric value for pH sample parameter","false","" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" diff --git a/docs/templates/MRI_Assay_Template.csv b/docs/templates/MRI_Assay_Template.csv index c6cb2fe4..1bb0ba39 100644 --- a/docs/templates/MRI_Assay_Template.csv +++ b/docs/templates/MRI_Assay_Template.csv @@ -1,35 +1,35 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"MRISequence","The scanning sequence/modality that is used for a conventional MRI scan.","optional","T1-weighted,T2-weighted,PD-weighted,Short Tau Inversion Recovery","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"MRISequence","The scanning sequence/modality that is used for a conventional MRI scan.","false","T1-weighted,T2-weighted,PD-weighted,ShortTauInversionRecovery" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Patient_Timepoints_Template.csv b/docs/templates/Patient_Timepoints_Template.csv index 93b1ffa3..49fc37f6 100644 --- a/docs/templates/Patient_Timepoints_Template.csv +++ b/docs/templates/Patient_Timepoints_Template.csv @@ -1,12 +1,12 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" diff --git a/docs/templates/Pharmacokinetics_Assay_Template.csv b/docs/templates/Pharmacokinetics_Assay_Template.csv index 6f7379e4..c5d38aa6 100644 --- a/docs/templates/Pharmacokinetics_Assay_Template.csv +++ b/docs/templates/Pharmacokinetics_Assay_Template.csv @@ -1,38 +1,38 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"genePerturbationTechnology","Technology used to perform gene perturbation","optional","#Gene_Perturbation_Technology","" -"genePerturbationType","Specific way in which a single gene was perturbed in a sample","optional","#Gene_Perturbation","" -"genePerturbed","The HUGO gene symbol for the gene that is perturbed.","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","false","" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"genePerturbationTechnology","Technology used to perform gene perturbation","false","RNAi,CRISPR,CRERecombinase" +"genePerturbationType","Specific way in which a single gene was perturbed in a sample","false","#GenePerturbationType" +"genePerturbed","The HUGO gene symbol for the gene that is perturbed.","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Plate_Based_Reporter_Assay_Template.csv b/docs/templates/Plate_Based_Reporter_Assay_Template.csv index 09eaeadd..2d99eb84 100644 --- a/docs/templates/Plate_Based_Reporter_Assay_Template.csv +++ b/docs/templates/Plate_Based_Reporter_Assay_Template.csv @@ -1,37 +1,37 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"reporterGene","A biological material (clone, oligo, etc.) on an array which will report on some biosequence or biosequences.","optional","","" -"reporterSubstance","A gene which produces an easily assayed phenotype. Often used for expression studies of heterologous promoters.","optional","","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"assayTarget","The HUGO gene symbol that represents the target analyte assayed.","false","" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"reporterGene","A biological material (clone, oligo, etc.) on an array which will report on some biosequence or biosequences.","false","" +"reporterSubstance","A gene which produces an easily assayed phenotype. Often used for expression studies of heterologous promoters.","false","" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Processed_Aligned_Reads_Template.csv b/docs/templates/Processed_Aligned_Reads_Template.csv index cd41bada..3d1ec3a9 100644 --- a/docs/templates/Processed_Aligned_Reads_Template.csv +++ b/docs/templates/Processed_Aligned_Reads_Template.csv @@ -1,45 +1,45 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"averageBaseQuality","Average base quality collected from samtools","optional","","" -"averageInsertSize","Average insert size as reported by samtools","optional","","" -"averageReadLength","Average read length collected from samtools","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"genomicReference","Version of genome reference used for alignment in processing workflow","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"meanCoverage","Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools","optional","","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"pairsOnDifferentChr","Pairs on different chromosomes collected from samtools","optional","","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"proportionCoverage10x","Proportion of all reference bases for whole genome sequencing, or targeted bases for whole exome and targeted sequencing, that achieves 10X or greater coverage from Picard Tools","optional","","" -"proportionCoverage30x","Proportion of all reference bases for whole genome sequencing, or targeted bases for whole exome and targeted sequencing, that achieves 30X or greater coverage from Picard Tools","optional","","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readsDuplicatedPercent","Percent of duplicated reads collected from samtools","optional","","" -"readsMappedPercent","Percent of mapped reads collected from samtools","optional","","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"totalReads","If available, the total number of reads collected from samtools.","optional","","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"workflow","Name and version of the workflow used to generate/analyze the data","optional","","" -"workflowLink","Workflow URL reference","optional","","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"averageBaseQuality","Average base quality collected from samtools","false","" +"averageInsertSize","Average insert size as reported by samtools","false","" +"averageReadLength","Average read length collected from samtools","false","" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"genomicReference","Version of genome reference used for alignment in processing workflow","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"meanCoverage","Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools","false","" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"pairsOnDifferentChr","Pairs on different chromosomes collected from samtools","false","" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"proportionCoverage10x","Proportion of all reference bases for whole genome sequencing, or targeted bases for whole exome and targeted sequencing, that achieves 10X or greater coverage from Picard Tools","false","" +"proportionCoverage30x","Proportion of all reference bases for whole genome sequencing, or targeted bases for whole exome and targeted sequencing, that achieves 30X or greater coverage from Picard Tools","false","" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readsDuplicatedPercent","Percent of duplicated reads collected from samtools","false","" +"readsMappedPercent","Percent of mapped reads collected from samtools","false","" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"totalReads","If available, the total number of reads collected from samtools.","false","" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" +"workflow","Name and version of the workflow used to generate/analyze the data","false","" +"workflowLink","Workflow URL reference","false","" diff --git a/docs/templates/Processed_Expression_Template.csv b/docs/templates/Processed_Expression_Template.csv index 102d57e1..84cc6b22 100644 --- a/docs/templates/Processed_Expression_Template.csv +++ b/docs/templates/Processed_Expression_Template.csv @@ -1,33 +1,33 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"expressionUnit","Measure used for transcript expression quantification","required","TPM,RPKM,FPKM,Counts,Other","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"workflow","Name and version of the workflow used to generate/analyze the data","optional","","" -"workflowLink","Workflow URL reference","optional","","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"expressionUnit","Measure used for transcript expression quantification","true","#ExpressionUnit" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" +"workflow","Name and version of the workflow used to generate/analyze the data","false","" +"workflowLink","Workflow URL reference","false","" diff --git a/docs/templates/Processed_Variant_Calls_Template.csv b/docs/templates/Processed_Variant_Calls_Template.csv index e4edf3c3..5045adb8 100644 --- a/docs/templates/Processed_Variant_Calls_Template.csv +++ b/docs/templates/Processed_Variant_Calls_Template.csv @@ -1,22 +1,23 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"workflow","Name and version of the workflow used to generate/analyze the data","optional","","" -"workflowLink","Workflow URL reference","optional","","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"assay","The technology used to generate the data in this file","false","#Assay" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isFilteredReads","Whether the reads in the processed result has been filtered by adding a 'PASS' filter or other filters as determined by the data generator","false","" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"workflow","Name and version of the workflow used to generate/analyze the data","false","" +"workflowLink","Workflow URL reference","false","" diff --git a/docs/templates/Proteomics_Assay_Template.csv b/docs/templates/Proteomics_Assay_Template.csv index 15cb2b0a..cb651c47 100644 --- a/docs/templates/Proteomics_Assay_Template.csv +++ b/docs/templates/Proteomics_Assay_Template.csv @@ -1,35 +1,35 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"proteinExtractSource","Source of the extracted protein used in the experiment","required","cell lysate, nuclei, mitochondria, cytoplasm","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","optional","","" -"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","optional","","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"timePointUnit","For timed experiments this represents the unit of time measured","optional","#Time_Unit","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"proteinExtractSource","Source of the extracted protein used in the experiment","true","Celllysate,Nuclei,Mitochondria,Cytoplasm" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"entityId","TBD","false","" +"experimentalCondition","A free-text description of the experimental condition (e.g. 5 mM doxorubicin).","false","" +"experimentalTimepoint","The numeric value indicating the time elapsed from the beginning of the experiment at which the specimen was collected. Use in tandem with timePointUnit","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"timePointUnit","For timed experiments this represents the unit of time measured","false","#TimePointUnit" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/RNASeq_Template.csv b/docs/templates/RNASeq_Template.csv index 62279cf9..8b5c5ffc 100644 --- a/docs/templates/RNASeq_Template.csv +++ b/docs/templates/RNASeq_Template.csv @@ -1,43 +1,47 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"readLength","Number of base pairs (bp) sequenced for a read","required","","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"isStranded","Whether or not the library is stranded (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readPair","The read of origin","optional","#Read_Pair","" -"readPairOrientation","The relative orientation of the reads in a paired-end protocol","optional","#Read_Pair_Orientation","" -"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","optional","#Read_Strand_Origin","" -"runType","Is the sequencing run single or paired end?","optional","#Run_Type","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"targetDepth","The targeted read depth prior to sequencing.","optional","","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"readLength","Number of base pairs (bp) sequenced for a read","true","" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"isStranded","Whether or not the library is stranded (Yes; No)","false","Yes,No" +"isXenograft","Whether or not sample source is a xenograft (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readPair","The read of origin","false","" +"readPairOrientation","The relative orientation of the reads in a paired-end protocol","false","Inward,Outward,Matching,Fr-firststrand" +"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","false","Forward,Reverse" +"runType","Is the sequencing run single or paired end?","false","SingleEnd,PairedEnd" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"targetDepth","The targeted read depth prior to sequencing.","false","" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"transplantationRecipientSpecies","Species into which donor tissue was grown","false","Human,Mouse" +"transplantationRecipientTissue","Tissue into which a xenograph sample is transplanted","false","" +"transplantationType","Type of transplantation involved in the experiment, derived from MESH","false","Allograft,Xenograft,Autograft,Isograft" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/ScRNASeq_Template.csv b/docs/templates/ScRNASeq_Template.csv index ece9b0d3..98d182ff 100644 --- a/docs/templates/ScRNASeq_Template.csv +++ b/docs/templates/ScRNASeq_Template.csv @@ -1,42 +1,46 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"readLength","Number of base pairs (bp) sequenced for a read","required","","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"isStranded","Whether or not the library is stranded (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readPair","The read of origin","optional","#Read_Pair","" -"readPairOrientation","The relative orientation of the reads in a paired-end protocol","optional","#Read_Pair_Orientation","" -"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","optional","#Read_Strand_Origin","" -"runType","Is the sequencing run single or paired end?","optional","#Run_Type","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"readLength","Number of base pairs (bp) sequenced for a read","true","" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"isStranded","Whether or not the library is stranded (Yes; No)","false","Yes,No" +"isXenograft","Whether or not sample source is a xenograft (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readPair","The read of origin","false","" +"readPairOrientation","The relative orientation of the reads in a paired-end protocol","false","Inward,Outward,Matching,Fr-firststrand" +"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","false","Forward,Reverse" +"runType","Is the sequencing run single or paired end?","false","SingleEnd,PairedEnd" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"transplantationRecipientSpecies","Species into which donor tissue was grown","false","Human,Mouse" +"transplantationRecipientTissue","Tissue into which a xenograph sample is transplanted","false","" +"transplantationType","Type of transplantation involved in the experiment, derived from MESH","false","Allograft,Xenograft,Autograft,Isograft" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/Source_Code_Template.csv b/docs/templates/Source_Code_Template.csv index af1609ac..c8212ff3 100644 --- a/docs/templates/Source_Code_Template.csv +++ b/docs/templates/Source_Code_Template.csv @@ -1,10 +1,10 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"documentation","URL to any documentation describing the resource and its use.","optional","","" -"programmingLanguage","A computer programming language","optional","Python, R, MATLAB, Java, C, C++, C#, Javascript, bash","" -"runtimePlatform","Runtime platform or script interpreter dependencies (e.g. Java v1, Python 2.3).","optional","","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"documentation","URL to any documentation describing the resource and its use.","false","" +"entityId","TBD","false","" +"programmingLanguage","A computer programming language","false","#ProgrammingLanguage" +"runtimePlatform","Runtime platform or script interpreter dependencies (e.g. Java v1, Python 2.3).","false","" diff --git a/docs/templates/WES_Template.csv b/docs/templates/WES_Template.csv index c852014e..0f378eec 100644 --- a/docs/templates/WES_Template.csv +++ b/docs/templates/WES_Template.csv @@ -1,43 +1,43 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"readLength","Number of base pairs (bp) sequenced for a read","required","","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"targetCaptureKitID","A unique identifier for the kit used to construct a genomic library using target capture-based techniques, which should be composed of the vendor name, kit name and kit version.","required","","For WES processing, the target BED used could be provided based on the target capture kit." -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"isStranded","Whether or not the library is stranded (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readPair","The read of origin","optional","#Read_Pair","" -"readPairOrientation","The relative orientation of the reads in a paired-end protocol","optional","#Read_Pair_Orientation","" -"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","optional","#Read_Strand_Origin","" -"runType","Is the sequencing run single or paired end?","optional","#Run_Type","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"readLength","Number of base pairs (bp) sequenced for a read","true","" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"targetCaptureKitID","A unique identifier for the kit used to construct a genomic library using target capture-based techniques, which should be composed of the vendor name, kit name and kit version.","true","" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"isStranded","Whether or not the library is stranded (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readPair","The read of origin","false","" +"readPairOrientation","The relative orientation of the reads in a paired-end protocol","false","Inward,Outward,Matching,Fr-firststrand" +"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","false","Forward,Reverse" +"runType","Is the sequencing run single or paired end?","false","SingleEnd,PairedEnd" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/docs/templates/WGS_Template.csv b/docs/templates/WGS_Template.csv index ece9b0d3..4257937b 100644 --- a/docs/templates/WGS_Template.csv +++ b/docs/templates/WGS_Template.csv @@ -1,42 +1,42 @@ -"Field","Description","Required","ControlledVocab","Note" -"Component","Category of metadata manifest; provide the same one for all items/rows.","required","","" -"Filename","The name of the file.","required","","Automatically filled when using the curator app." -"assay","The technology used to generate the data in this file","required","#Assay","" -"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","required","#Data_Level","" -"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","required","#Data","" -"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","required","#File_Format","" -"readLength","Number of base pairs (bp) sequenced for a read","required","","" -"resourceType","The type of resource being stored and annotated","required","#Resource","" -"species","The name of a species (typically a taxonomic group) of organism.","required","#Species","" -"age","A numeric value representing age of the individual. Use with ageUnit.","optional","","" -"ageUnit","A time unit that can be used with a given age value, e.g. years.","optional","days, months, years","" -"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID. ","optional","","" -"cellType","A cell type is a distinct morphological or functional form of cell.","optional","#Cell","" -"comments","Brief free-text comments that may also be important to understanding the resource.","optional","","" -"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","optional","#Diagnosis","Currently a single NF diagnosis allowed; in the future multiple diagnoses may be allowed for comorbidities." -"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","optional","#Dissociation_Method","" -"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","optional","","Typically annotated with a single ID, but many allowed for, e.g. a clinical dataset that contains endpoint data for an entire cohort." -"isCellLine","Whether or not sample source is a cell line (Yes; No)","optional","Yes, No","" -"isPrimaryCell","Whether or not cellType is primary (Yes; No)","optional","Yes, No","" -"isStranded","Whether or not the library is stranded (Yes; No)","optional","Yes, No","" -"libraryPrep","The general strategy by which the library was prepared","optional","#Library_Prep","" -"libraryPreparationMethod","Method by which library was prepared","optional","#Library_Preparation_Method","" -"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293T (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","optional","#Model_System","" -"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","optional","#Genotype","" -"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","optional","#Nucleic_Acid_Source","" -"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","optional","#Organ","" -"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","optional","","" -"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","optional","#Platform","" -"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","optional","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, Not Applicable","" -"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","optional","","" -"readPair","The read of origin","optional","#Read_Pair","" -"readPairOrientation","The relative orientation of the reads in a paired-end protocol","optional","#Read_Pair_Orientation","" -"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","optional","#Read_Strand_Origin","" -"runType","Is the sequencing run single or paired end?","optional","#Run_Type","" -"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","optional","#Sex","" -"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","optional","","Typically annotated with a single ID, but many allowed for summary data." -"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","optional","#Specimen_State","" -"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","optional","#Tissue","" -"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","optional","#Tumor","" -"entityId",NA,NA,NA,NA +"Field","Description","Required","ValidRange" +"Component","Category of metadata manifest; provide the same one for all items/rows.","true","" +"Filename","The name of the file.","true","" +"dataSubtype","Further qualification of dataType, which may be used to indicate the state of processing of the data, aggregation of the data, or presence of metadata.","true","#DataSubtype" +"dataType","A type of experimental, clinical, or other data. This typically refers to a high-level data type that can be related to an assay type. For example, a file of dataType `genomicVariants` might have an assay value of `whole genome sequencing`.","true","#DataType" +"fileFormat","Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software","true","#FileFormat" +"readLength","Number of base pairs (bp) sequenced for a read","true","" +"resourceType","The type of resource being stored and annotated","true","#ResourceType" +"species","The name of a species (typically a taxonomic group) of organism.","true","#Species" +"age","A numeric value representing age of the individual. Use with ageUnit.","false","" +"ageUnit","A time unit that can be used with a given age value, e.g. years.","false","Days,Months,Years" +"aliquotID","A unique identifier (non-PII) that represents the aliquots used for e.g. replicate runs. This is linked to the specimenID.","false","" +"assay","The technology used to generate the data in this file","false","#Assay" +"cellType","A cell type is a distinct morphological or functional form of cell.","false","#CellType" +"comments","Brief free-text comments that may also be important to understanding the resource.","false","" +"diagnosis","A diagnosis is the result of a medical investigation to identify a disorder from its signs and symptoms.","false","#Diagnosis" +"dissociationMethod","Procedure by which a biological specimen is dissociated into individual cells or a cell suspension","false","#DissociationMethod" +"entityId","TBD","false","" +"individualID","A unique identifier (non-PII) that represents the individual from which the data came. This could be a patient or animal ID.","false","" +"isCellLine","Whether or not sample source is a cell line (Yes; No)","false","Yes,No" +"isPrimaryCell","Whether or not cellType is primary (Yes; No)","false","Yes,No" +"isStranded","Whether or not the library is stranded (Yes; No)","false","Yes,No" +"libraryPrep","The general strategy by which the library was prepared","false","RRNAdepletion,PolyAselection,LncRNAenrichment,MiRNAenrichment" +"libraryPreparationMethod","Method by which library was prepared","false","#LibraryPreparationMethod" +"modelSystemName","A group of presumed common ancestry with clear-cut physiological but usually not morphological distinctions such as an animal model or cell line. EXAMPLE(S): HEK293 (cell line), Minnesota5 (swine strain), DXL (poultry strain), RB51 (vaccine strain of Brucella abortus)","false","#ModelSystemName" +"nf1Genotype","Genotype of NF1 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nf2Genotype","Genotype of NF2 gene in the biospecimen from which the data were derived, if known","false","#Nf1Genotype" +"nucleicAcidSource","Source of the extracted nucleic acid used in the experiment","false","#NucleicAcidSource" +"organ","A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region.","false","#Organ" +"parentSpecimenID","A unique identifier (non-PII) that represents the parent specimen (sample) from which the data came from, e.g. the single parent tumor. The parentSpecimenIDcan be the same as specimenID when there is no subsectioning.","false","" +"platform","A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data.","false","#Platform" +"progressReportNumber","Indicates milestone the data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1. Also if submitting data associated with first milestone, progressReportNumber =1","false","#ReadPair" +"readDepth","If available, the coverage statistic as output from bedtools coverage or samtools stats.","false","" +"readPair","The read of origin","false","" +"readPairOrientation","The relative orientation of the reads in a paired-end protocol","false","Inward,Outward,Matching,Fr-firststrand" +"readStrandOrigin","The strand from which the read originates in a strand-specific protocol","false","Forward,Reverse" +"runType","Is the sequencing run single or paired end?","false","SingleEnd,PairedEnd" +"sex","Phenotypic expression of chromosomal makeup that defines a study subject as male, female, or other.","false","Male,Female,Unknown" +"specimenID","A unique identifier (non-PII) that represents the subspecimen (subsample) from which the data came, e.g. an ID that distinguishes between different parts of the same parent tumor specimen. This can be the A A unique identifier (non-PII) that represents the","false","" +"specimenPreparationMethod","Term that represents preservation of the sample before usage in, e.g. sequencing","false","#SpecimenPreparationMethod" +"tissue","A tissue is a mereologically maximal collection of cells that together perform physiological function.","false","#Tissue" +"tumorType","The type of tumor that the biospecimen used to generate the data were collected from.","false","#CellType" diff --git a/modules/Data/Metadata.csv b/modules/Data/Resource.csv similarity index 100% rename from modules/Data/Metadata.csv rename to modules/Data/Resource.csv