From a884d71e87ca05dfc05389ee9afe44c6aea249dd Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:46:22 -0700 Subject: [PATCH 01/12] CosMx Data Model Updates New attributes and changes for the CosMx Experiment --- HTAN.model.csv | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index ff302cdd..07ab17e0 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -6,7 +6,7 @@ Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ -File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc",,,TRUE,,,, +File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, CosMx Experiment",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning HTAN Participant ID,HTAN ID associated with a patient based on HTAN ID SOP (eg HTANx_yyy ),,,,TRUE,Patient,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))$ warning @@ -84,7 +84,7 @@ Visium File Type,The file type generated for the visium experiment.,"reference p Run ID,A unique identifier for this individual run (typically associated with a single slide) of the spatial transcriptomic processing workflow.,,,,TRUE,Spatial Transcriptomics,,, Capture Area,"Area (or Capture Area) - One of the either four or two active regions where tissue can be placed on a Visium slide. Each area is intended to contain only one tissue sample. Slide areas are named consecutively from top to bottom: A1, B1, C1, D1 for Visium slides with 6.5 mm Capture Area and A, B for CytAssist slides with 11 mm Capture Area. Both CytAssist slides with 6.5 mm Capture Area and Gateway Slides contain only two slide areas, A1 and D1.","A, B, C, D, A1, B1, C1, D1",,,FALSE,Spatial Transcriptomics,,, Slide Version,Version of imaging slide used. Slide version is critical for the analysis of the sequencing data as different slides have different capture area layouts.,"V1, V2, V3, V4",,,FALSE,Spatial Transcriptomics,,, -Slide ID,"The unique identifier printed on the label of each Visium slide. The serial number starts with V followed by a number which can range between one through five and ends with a dash and a three digit number, such as 123.",,,,FALSE,Spatial Transcriptomics,,, +Slide ID,"For Visium, it is the unique identifier printed on the label of each Visium slide. The serial number starts with V followed by a number which can range between one through five and ends with a dash and a three digit number, such as 123. For CosMx, this refers to the loaded Flow Cell ID. For Xenium, this ID indicates the slide orientation, as it matches the relative location of the ID on the physical Xenium slide.",,,,FALSE,Spatial Transcriptomics,,, Image Re-orientation,"To ensure good fiducial alignment and tissue spots detection, it is important to correct for this shift in orientation.","TRUE, FALSE",,,FALSE,Spatial Transcriptomics,,, Permeabilization Time,Fixed and stained tissue sections are permeabilized for different times. Each Capture Area captures polyadenylated mRNA from the attached tissue section. Measure is provided in minutes.,,,,FALSE,Spatial Transcriptomics,,, Whitelist Spatial Barcode File Link,Link to file listing all possible spatial barcodes. URL,,,,TRUE,Spatial Transcriptomics,,, @@ -134,7 +134,7 @@ Scan Offset X,Offset X of the scan for GeoMx Analysis,,,,TRUE,NanoString GeoMx D Scan Offset Y,Offset Y of the scan for GeoMx Analysis,,,,TRUE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, Binding Density,The binding density as reported by the application,,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, Positive norm factor,The Positive Control Normalization factor calculated using pos-hyb controls,,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, -Surface area,Surface area of the ROI in square microns (µm2),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, +Surface area,"Surface area of the ROI in square microns (µm2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area",,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Nuclei count,Number of nuclei detected in the segment (if applicable),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Tissue Stain,e.g. CD45 or PanCK (if masking was performed),,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata,GeoMx ROI and Segment Metadata Attributes. The assayed biospecimen should be reported one per row with the associated ROI coordinates. ,,"HTAN Parent Biospecimen ID, Scan name, Slide name, ROI name, Segment name, ROI X Coordinate,ROI Y Coordinate, Tags, Scan Height, Scan Width, Scan Offset X, Scan Offset Y, Surface area, Nuclei count, Sequencing Saturation, MapQ30, Raw reads, Stitched reads, Aligned reads, Deduplicated reads, In Situ Negative median, Biological probe median",,FALSE,Assay,,, @@ -165,6 +165,19 @@ RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is f HTAN RPPA Antibody Table,A table containing antibody level metadata for RPPA,,"HTAN RPPA Antibody Table ID, Filename, File Format, Ab Name Reported on Dataset, GENCODE Gene Symbol Target, UNIPROT Protein ID Target, Phosphoprotein Flag, Vendor, Catalog Number, Internal Ab ID, Species, RPPA Dilution, Phospho Site, RPPA Validation Status, Clone, Clonality, Antibody Notes",,TRUE,RPPA Level 2,,, RPPA Level 3,Level 3 Reverse Phase Protein Array (RPPA) data contains intra-batch normalized intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Software and Version, Normalization Method",,FALSE,Assay,Biospecimen,, RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batch corrected intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Batch Correction Method",,FALSE,Assay,RPPA Level 2,, +Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, +CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, +CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, +Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, +Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, +Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, +Number of FOVs,The total number of FOVs recorded for the sample on a single flow cell,,,,TRUE,Spatial Transcriptomics,,, +Experiment IF Channels,A comma-separated list with any number of channels the user deems appropriate (Example: PanCK, CD45, CD3, DAPI),,,,TRUE,Spatial Transcriptomics,,, +Transcripts per Cell,Mean or Median transcript count per cell analyzed on the flow cell or slide,,,,TRUE,Spatial Transcriptomics,,, +Percent of Transcripts within Cells,The percentage of transcripts assigned to assayed cells,,,,TRUE,Spatial Transcriptomics,,, +Mean Total Transcripts per Area,The mean total transcripts per um3,,,,TRUE,Spatial Transcriptomics,,, +Unique Genes,The total unique genes detected above background,,,,FALSE,Spatial Transcriptomics,,, +Total Negative Probe Counts,Mean Total Negative probe counts/cell,,,,TRUE,Spatial Transcriptomics,,, Mass Spectrometry Level 1,"Mass Spectrometry derived data that includes proteomics, metabolomics, and lipidomics, level 1",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, MS Batch ID, MS-based Assay Type, Analyte Type, MS-based Targeted, MS Instrument Vendor and Model, MS Source, Polarity, Mass Range Low Value, Mass Range High Value, Data Collection Mode, MS Scan Mode, MS Labeling, Protocol Link, LC Instrument Vendor and Model, LC Column Vendor and Model, LC Resin, LC Length Value, LC Temp Value, LC ID Value, LC Flow Rate, LC Gradient, LC Mobile Phase A, LC Mobile Phase B, Software and Version, MS Instrument Metadata File",,FALSE,Assay,Biospecimen,, Mass Spectrometry Level 2,"Mass Spectrometry derived data that includes proteomics, metabolomics, and lipidomics, level 2",,"Component, Filename, File Format, HTAN Data File ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, MS Assay Category, Software and Version, Mass Spectrometry Auxiliary File",,FALSE,Assay,Mass Spectrometry Level 1,, Mass Spectrometry Level 3,"Mass Spectrometry derived data that includes proteomics, metabolomics, and lipidomics, level 3",,"Component, Filename, File Format, HTAN Data File ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, MS Assay Category, Software and Version, Mass Spectrometry Auxiliary File",,FALSE,Assay,Mass Spectrometry Level 2,, @@ -1011,4 +1024,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, From 689a3f125f9ced82c10fd3ca8689187846022f10 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:54:29 -0700 Subject: [PATCH 02/12] Update HTAN.model.csv --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 07ab17e0..0785b38e 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -165,7 +165,7 @@ RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is f HTAN RPPA Antibody Table,A table containing antibody level metadata for RPPA,,"HTAN RPPA Antibody Table ID, Filename, File Format, Ab Name Reported on Dataset, GENCODE Gene Symbol Target, UNIPROT Protein ID Target, Phosphoprotein Flag, Vendor, Catalog Number, Internal Ab ID, Species, RPPA Dilution, Phospho Site, RPPA Validation Status, Clone, Clonality, Antibody Notes",,TRUE,RPPA Level 2,,, RPPA Level 3,Level 3 Reverse Phase Protein Array (RPPA) data contains intra-batch normalized intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Software and Version, Normalization Method",,FALSE,Assay,Biospecimen,, RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batch corrected intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Batch Correction Method",,FALSE,Assay,RPPA Level 2,, -Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, +Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)","Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, From a6fc73a9ae60a27d85676cc39047a87840b7e387 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:59:35 -0700 Subject: [PATCH 03/12] Update HTAN.model.csv --- HTAN.model.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 0785b38e..9eece559 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -161,11 +161,11 @@ Stripe Window,"Binning size used for calling significant architectural stripes. Loop Calling,Tool used for identifying loop interactions,,,,TRUE,Sequencing,,, Imaging Level 4,Derived imaging data: Object-by-feature array,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Parent Channel Metadata ID, HTAN Data File ID, Parameter file, Software and Version, Commit SHA,Number of Objects, Number of Features,Imaging Object Class, Imaging Summary Statistic",,FALSE,Assay,Imaging Level 3 Channels,, SRRS Imaging Level 2,SRRS-specific HTAN raw and pre-processed image data,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Data File ID, Channel Metadata Filename, Imaging Assay Type, Protocol Link, Software and Version, Microscope, Objective, NominalMagnification, Pyramid, Zstack, Tseries, Passed QC, Frame Averaging, Image ID, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, Pixels BigEndian, PlaneCount, SizeC, SizeT, SizeX, SizeY, SizeZ, PixelType",,FALSE,Assay,Biospecimen,, -RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is fitted using the monotone increasing B-spline model in the SuperCurve R package. This fits a single curve using all the samples on a slide with the signal intensity as the response variable and the dilution steps as independent variables. The fitted curve is plotted with the signal intensities on the y-axis and the log2-concentration of proteins on the x-axis for diagnostic purposes.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, HTAN RPPA Antibody Table, Assay Type, Protocol Link, Software and Version",,FALSE,Assay,Biospecimen,, +RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is fitted using the monotone increasing B-spline model in the SuperCurve R package. This fits a single curve using all the samples on a slide with the signal intensity as the response variable and the dilution steps as independent variables. The fitted curve is plotted with the signal intensities on the y-axis and the log2-concentration of proteins on the x-axis for diagnostic purposes.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, HTAN RPPA Antibody Table, Assay Type, Protocol Link, Software and Version",,FALSE,Assay,Biospecimen,, HTAN RPPA Antibody Table,A table containing antibody level metadata for RPPA,,"HTAN RPPA Antibody Table ID, Filename, File Format, Ab Name Reported on Dataset, GENCODE Gene Symbol Target, UNIPROT Protein ID Target, Phosphoprotein Flag, Vendor, Catalog Number, Internal Ab ID, Species, RPPA Dilution, Phospho Site, RPPA Validation Status, Clone, Clonality, Antibody Notes",,TRUE,RPPA Level 2,,, RPPA Level 3,Level 3 Reverse Phase Protein Array (RPPA) data contains intra-batch normalized intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Software and Version, Normalization Method",,FALSE,Assay,Biospecimen,, RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batch corrected intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Batch Correction Method",,FALSE,Assay,RPPA Level 2,, -Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)","Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, +Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, From 00ec4052aa4ae7883500634bc847362b82068c47 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:01:18 -0700 Subject: [PATCH 04/12] Update HTAN.model.csv --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 9eece559..36628548 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -172,7 +172,7 @@ Panel Name,The human-readable panel name. This could be the Gene Panel name or P Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, Number of FOVs,The total number of FOVs recorded for the sample on a single flow cell,,,,TRUE,Spatial Transcriptomics,,, -Experiment IF Channels,A comma-separated list with any number of channels the user deems appropriate (Example: PanCK, CD45, CD3, DAPI),,,,TRUE,Spatial Transcriptomics,,, +Experiment IF Channels,"A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)",,,,TRUE,Spatial Transcriptomics,,, Transcripts per Cell,Mean or Median transcript count per cell analyzed on the flow cell or slide,,,,TRUE,Spatial Transcriptomics,,, Percent of Transcripts within Cells,The percentage of transcripts assigned to assayed cells,,,,TRUE,Spatial Transcriptomics,,, Mean Total Transcripts per Area,The mean total transcripts per um3,,,,TRUE,Spatial Transcriptomics,,, From 2faba94a6135a0b5f3f9287873dc862d7fd41384 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 13 Apr 2024 08:08:19 +0000 Subject: [PATCH 05/12] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 411 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 392 insertions(+), 19 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 8a7f0c35..0fde1c61 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -1291,6 +1291,9 @@ }, { "@id": "bts:Pkc" + }, + { + "@id": "bts:CosMxExperiment" } ], "sms:displayName": "File Format", @@ -2836,6 +2839,23 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:CosMxExperiment", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CosMxExperiment", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CosMx Experiment", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:Checksum", "@type": "rdfs:Class", @@ -39417,7 +39437,7 @@ { "@id": "bts:SlideID", "@type": "rdfs:Class", - "rdfs:comment": "The unique identifier printed on the label of each Visium slide. The serial number starts with V followed by a number which can range between one through five and ends with a dash and a three digit number, such as 123.", + "rdfs:comment": "For Visium, it is the unique identifier printed on the label of each Visium slide. The serial number starts with V followed by a number which can range between one through five and ends with a dash and a three digit number, such as 123. For CosMx, this refers to the loaded Flow Cell ID. For Xenium, this ID indicates the slide orientation, as it matches the relative location of the ID on the physical Xenium slide.", "rdfs:label": "SlideID", "rdfs:subClassOf": [ { @@ -41688,7 +41708,7 @@ { "@id": "bts:Surfacearea", "@type": "rdfs:Class", - "rdfs:comment": "Surface area of the ROI in square microns (µm2)", + "rdfs:comment": "Surface area of the ROI in square microns (µm2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area", "rdfs:label": "Surfacearea", "rdfs:subClassOf": [ { @@ -43724,6 +43744,376 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:NanostringCosMxSMIExperiment", + "@type": "rdfs:Class", + "rdfs:comment": "RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)", + "rdfs:label": "NanostringCosMxSMIExperiment", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Nanostring CosMx SMI Experiment", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Biospecimen" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Filename" + }, + { + "@id": "bts:FileFormat" + }, + { + "@id": "bts:HTANParentBiospecimenID" + }, + { + "@id": "bts:HTANDataFileID" + }, + { + "@id": "bts:CosMxBundleContents" + }, + { + "@id": "bts:SlideID" + }, + { + "@id": "bts:CosMxAssayType" + }, + { + "@id": "bts:PanelName" + }, + { + "@id": "bts:ProtocolLink" + }, + { + "@id": "bts:SoftwareandVersion" + }, + { + "@id": "bts:TotalNumberofCells" + }, + { + "@id": "bts:TotalNumberofTargets" + }, + { + "@id": "bts:NumberofFOVs" + }, + { + "@id": "bts:SurfaceArea" + }, + { + "@id": "bts:ExperimentIFChannels" + }, + { + "@id": "bts:TranscriptsperCell" + }, + { + "@id": "bts:PercentTotalTranscriptswithinCells" + }, + { + "@id": "bts:MeanTotalTranscriptsperArea" + }, + { + "@id": "bts:UniqueGenes" + }, + { + "@id": "bts:TotalNegativeProbeCounts" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CosMxBundleContents", + "@type": "rdfs:Class", + "rdfs:comment": "A comma separated list of filenames within the CosMx bundle zip file", + "rdfs:label": "CosMxBundleContents", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CosMx Bundle Contents", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:CosMxAssayType", + "@type": "rdfs:Class", + "rdfs:comment": "The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay", + "rdfs:label": "CosMxAssayType", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:RNA" + }, + { + "@id": "bts:Protein" + } + ], + "sms:displayName": "CosMx Assay Type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:PanelName", + "@type": "rdfs:Class", + "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name", + "rdfs:label": "PanelName", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Panel Name", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TotalNumberofCells", + "@type": "rdfs:Class", + "rdfs:comment": "The total number of cells analyzed on the flow cell", + "rdfs:label": "TotalNumberofCells", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Total Number of Cells", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TotalNumberofTargets", + "@type": "rdfs:Class", + "rdfs:comment": "Refers to the target of an assay. Can be genes/transcripts or probes", + "rdfs:label": "TotalNumberofTargets", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Total Number of Targets", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:NumberofFOVs", + "@type": "rdfs:Class", + "rdfs:comment": "The total number of FOVs recorded for the sample on a single flow cell", + "rdfs:label": "NumberofFOVs", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Number of FOVs", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:SurfaceArea", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SurfaceArea", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Surface Area", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:ExperimentIFChannels", + "@type": "rdfs:Class", + "rdfs:comment": "A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)", + "rdfs:label": "ExperimentIFChannels", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Experiment IF Channels", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TranscriptsperCell", + "@type": "rdfs:Class", + "rdfs:comment": "Mean or Median transcript count per cell analyzed on the flow cell or slide", + "rdfs:label": "TranscriptsperCell", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Transcripts per Cell", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:PercentTotalTranscriptswithinCells", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PercentTotalTranscriptswithinCells", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Percent Total Transcripts within Cells", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MeanTotalTranscriptsperArea", + "@type": "rdfs:Class", + "rdfs:comment": "The mean total transcripts per um3", + "rdfs:label": "MeanTotalTranscriptsperArea", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Mean Total Transcripts per Area", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:UniqueGenes", + "@type": "rdfs:Class", + "rdfs:comment": "The total unique genes detected above background", + "rdfs:label": "UniqueGenes", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Unique Genes", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:TotalNegativeProbeCounts", + "@type": "rdfs:Class", + "rdfs:comment": "Mean Total Negative probe counts/cell", + "rdfs:label": "TotalNegativeProbeCounts", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Total Negative Probe Counts", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:RNA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "RNA", + "rdfs:subClassOf": [ + { + "@id": "bts:CosMxAssayType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "RNA", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Protein", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Protein", + "rdfs:subClassOf": [ + { + "@id": "bts:CosMxAssayType" + }, + { + "@id": "bts:AnalyteType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "protein", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:PercentofTranscriptswithinCells", + "@type": "rdfs:Class", + "rdfs:comment": "The percentage of transcripts assigned to assayed cells", + "rdfs:label": "PercentofTranscriptswithinCells", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Percent of Transcripts within Cells", + "sms:required": "sms:true", + "sms:validationRules": [] + }, { "@id": "bts:MassSpectrometryLevel1", "@type": "rdfs:Class", @@ -55463,23 +55853,6 @@ "sms:required": "sms:false", "sms:validationRules": [] }, - { - "@id": "bts:Protein", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "Protein", - "rdfs:subClassOf": [ - { - "@id": "bts:AnalyteType" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "protein", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:Metabolite", "@type": "rdfs:Class", From 4f1a136710015d62e4592ce0fb4cd59376d0c389 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 15 Apr 2024 09:55:48 -0700 Subject: [PATCH 06/12] Update HTAN.model.csv Fixes for case mistakes. --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 6eeb0112..16001b09 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -165,7 +165,7 @@ RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is f HTAN RPPA Antibody Table,A table containing antibody level metadata for RPPA,,"HTAN RPPA Antibody Table ID, Filename, File Format, Ab Name Reported on Dataset, GENCODE Gene Symbol Target, UNIPROT Protein ID Target, Phosphoprotein Flag, Vendor, Catalog Number, Internal Ab ID, Species, RPPA Dilution, Phospho Site, RPPA Validation Status, Clone, Clonality, Antibody Notes",,TRUE,RPPA Level 2,,, RPPA Level 3,Level 3 Reverse Phase Protein Array (RPPA) data contains intra-batch normalized intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Software and Version, Normalization Method",,FALSE,Assay,Biospecimen,, RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batch corrected intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Batch Correction Method",,FALSE,Assay,RPPA Level 2,, -Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface Area, Experiment IF Channels, Transcripts per Cell, Percent Total Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, +Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, From 2a3f48573498f31c1cb74f1c0be17e4b573e3eee Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:02:30 +0000 Subject: [PATCH 07/12] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 50 ++++++++--------------------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 0fde1c61..e8a6f350 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -43808,7 +43808,7 @@ "@id": "bts:NumberofFOVs" }, { - "@id": "bts:SurfaceArea" + "@id": "bts:Surfacearea" }, { "@id": "bts:ExperimentIFChannels" @@ -43817,7 +43817,7 @@ "@id": "bts:TranscriptsperCell" }, { - "@id": "bts:PercentTotalTranscriptswithinCells" + "@id": "bts:PercentofTranscriptswithinCells" }, { "@id": "bts:MeanTotalTranscriptsperArea" @@ -43941,23 +43941,6 @@ "sms:required": "sms:true", "sms:validationRules": [] }, - { - "@id": "bts:SurfaceArea", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "SurfaceArea", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Surface Area", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:ExperimentIFChannels", "@type": "rdfs:Class", @@ -43993,20 +43976,20 @@ "sms:validationRules": [] }, { - "@id": "bts:PercentTotalTranscriptswithinCells", + "@id": "bts:PercentofTranscriptswithinCells", "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "PercentTotalTranscriptswithinCells", + "rdfs:comment": "The percentage of transcripts assigned to assayed cells", + "rdfs:label": "PercentofTranscriptswithinCells", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "sms:displayName": "Percent Total Transcripts within Cells", - "sms:required": "sms:false", + "sms:displayName": "Percent of Transcripts within Cells", + "sms:required": "sms:true", "sms:validationRules": [] }, { @@ -44097,23 +44080,6 @@ "sms:required": "sms:false", "sms:validationRules": [] }, - { - "@id": "bts:PercentofTranscriptswithinCells", - "@type": "rdfs:Class", - "rdfs:comment": "The percentage of transcripts assigned to assayed cells", - "rdfs:label": "PercentofTranscriptswithinCells", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Percent of Transcripts within Cells", - "sms:required": "sms:true", - "sms:validationRules": [] - }, { "@id": "bts:MassSpectrometryLevel1", "@type": "rdfs:Class", From b6e204ce8bf996e57f6860e1791d5064710ded51 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:33:25 -0700 Subject: [PATCH 08/12] Feedback updates and clairfication --- HTAN.model.csv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index f216d5f8..faa6fa0b 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -6,7 +6,7 @@ Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ -File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, CosMx Experiment",,,TRUE,,,, +File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning HTAN Participant ID,HTAN ID associated with a patient based on HTAN ID SOP (eg HTANx_yyy ),,,,TRUE,Patient,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))$ warning @@ -134,7 +134,7 @@ Scan Offset X,Offset X of the scan for GeoMx Analysis,,,,TRUE,NanoString GeoMx D Scan Offset Y,Offset Y of the scan for GeoMx Analysis,,,,TRUE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, Binding Density,The binding density as reported by the application,,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, Positive norm factor,The Positive Control Normalization factor calculated using pos-hyb controls,,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, -Surface area,"Surface area of the ROI in square microns (µm2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area",,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, +Surface area,"Surface area of the ROI in square microns (µm^2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area",,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Nuclei count,Number of nuclei detected in the segment (if applicable),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Tissue Stain,e.g. CD45 or PanCK (if masking was performed),,,,FALSE,NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,,, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata,GeoMx ROI and Segment Metadata Attributes. The assayed biospecimen should be reported one per row with the associated ROI coordinates. ,,"HTAN Parent Biospecimen ID, Scan name, Slide name, ROI name, Segment name, ROI X Coordinate,ROI Y Coordinate, Tags, Scan Height, Scan Width, Scan Offset X, Scan Offset Y, Surface area, Nuclei count, Sequencing Saturation, MapQ30, Raw reads, Stitched reads, Aligned reads, Deduplicated reads, In Situ Negative median, Biological probe median",,FALSE,Assay,,, @@ -168,7 +168,7 @@ RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batc Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, -Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, +Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. "Xenium Human Immuno-Oncology Add-on B Gene Expression"). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. "CosMx Human Universal Cell Characterization Panel (1000-plex)") ,,,,TRUE,Spatial Transcriptomics,,, Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, Number of FOVs,The total number of FOVs recorded for the sample on a single flow cell,,,,TRUE,Spatial Transcriptomics,,, @@ -1024,4 +1024,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, From ed8ae8a8548bfb28ef408fa8136c41aa6c2fd68d Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:34:13 -0700 Subject: [PATCH 09/12] Update HTAN.model.csv --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index faa6fa0b..7ac3ee1f 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -168,7 +168,7 @@ RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batc Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, -Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. "Xenium Human Immuno-Oncology Add-on B Gene Expression"). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. "CosMx Human Universal Cell Characterization Panel (1000-plex)") ,,,,TRUE,Spatial Transcriptomics,,, +Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex)),,,,TRUE,Spatial Transcriptomics,,, Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, Number of FOVs,The total number of FOVs recorded for the sample on a single flow cell,,,,TRUE,Spatial Transcriptomics,,, From 2ca5ec7ec74821e3269984f2341115d23451c7be Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:36:11 -0700 Subject: [PATCH 10/12] Update HTAN.model.csv --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 7ac3ee1f..837c03a3 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -168,7 +168,7 @@ RPPA Level 4,Level 4 Reverse Phase Protein Array (RPPA) data contains intra-batc Nanostring CosMx SMI Experiment,"RNA and Protein Panel assays applied as part of Nanostring CosMx Spatial Molecular Imager (SMI)",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, CosMx Bundle Contents, Slide ID, CosMx Assay Type, Panel Name, Protocol Link, Software and Version, Total Number of Cells, Total Number of Targets, Number of FOVs, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Mean Total Transcripts per Area, Unique Genes, Total Negative Probe Counts",,FALSE,Spatial Transcriptomics,Biospecimen,, CosMx Bundle Contents,A comma separated list of filenames within the CosMx bundle zip file,,,,TRUE,Spatial Transcriptomics,,, CosMx Assay Type,The specification for barcodes on each image. Either RNA probe or protein antibody according to the assay,"RNA, Protein",,,TRUE,Spatial Transcriptomics,,, -Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex)),,,,TRUE,Spatial Transcriptomics,,, +Panel Name,"The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex))",,,,TRUE,Spatial Transcriptomics,,, Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, Number of FOVs,The total number of FOVs recorded for the sample on a single flow cell,,,,TRUE,Spatial Transcriptomics,,, From 86d0f49f83af1a527490c37392fae3ec5b4bc0bf Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 15:31:31 +0000 Subject: [PATCH 11/12] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index a58fde85..0b5e6196 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -1291,9 +1291,6 @@ }, { "@id": "bts:Pkc" - }, - { - "@id": "bts:CosMxExperiment" } ], "sms:displayName": "File Format", @@ -2839,23 +2836,6 @@ "sms:required": "sms:false", "sms:validationRules": [] }, - { - "@id": "bts:CosMxExperiment", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "CosMxExperiment", - "rdfs:subClassOf": [ - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "CosMx Experiment", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:Checksum", "@type": "rdfs:Class", @@ -41726,7 +41706,7 @@ { "@id": "bts:Surfacearea", "@type": "rdfs:Class", - "rdfs:comment": "Surface area of the ROI in square microns (µm2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area", + "rdfs:comment": "Surface area of the ROI in square microns (µm^2). In CosMx, this is referred to as the Scan Area. In Xenium, this is referred to as the Region Area", "rdfs:label": "Surfacearea", "rdfs:subClassOf": [ { @@ -43894,7 +43874,7 @@ { "@id": "bts:PanelName", "@type": "rdfs:Class", - "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name", + "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex))", "rdfs:label": "PanelName", "rdfs:subClassOf": [ { From f87bf5e9b2fb4d560535f47c55ab945204cce01c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 15:43:20 +0000 Subject: [PATCH 12/12] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 102 ---------------------------------------------- 1 file changed, 102 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index b1632854..04ba18cc 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -44125,57 +44125,6 @@ "sms:required": "sms:true", "sms:validationRules": [] }, - { - "@id": "bts:PanelName", - "@type": "rdfs:Class", - "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex))", - "rdfs:label": "PanelName", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Panel Name", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:TotalNumberofCells", - "@type": "rdfs:Class", - "rdfs:comment": "The total number of cells analyzed on the flow cell", - "rdfs:label": "TotalNumberofCells", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Total Number of Cells", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:TotalNumberofTargets", - "@type": "rdfs:Class", - "rdfs:comment": "Refers to the target of an assay. Can be genes/transcripts or probes", - "rdfs:label": "TotalNumberofTargets", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Total Number of Targets", - "sms:required": "sms:true", - "sms:validationRules": [] - }, { "@id": "bts:NumberofFOVs", "@type": "rdfs:Class", @@ -44193,57 +44142,6 @@ "sms:required": "sms:true", "sms:validationRules": [] }, - { - "@id": "bts:ExperimentIFChannels", - "@type": "rdfs:Class", - "rdfs:comment": "A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)", - "rdfs:label": "ExperimentIFChannels", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Experiment IF Channels", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:TranscriptsperCell", - "@type": "rdfs:Class", - "rdfs:comment": "Mean or Median transcript count per cell analyzed on the flow cell or slide", - "rdfs:label": "TranscriptsperCell", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Transcripts per Cell", - "sms:required": "sms:true", - "sms:validationRules": [] - }, - { - "@id": "bts:PercentofTranscriptswithinCells", - "@type": "rdfs:Class", - "rdfs:comment": "The percentage of transcripts assigned to assayed cells", - "rdfs:label": "PercentofTranscriptswithinCells", - "rdfs:subClassOf": [ - { - "@id": "bts:SpatialTranscriptomics" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Percent of Transcripts within Cells", - "sms:required": "sms:true", - "sms:validationRules": [] - }, { "@id": "bts:MeanTotalTranscriptsperArea", "@type": "rdfs:Class",