From f74fa2aaf30fcb821410b950f8c69c209287b840 Mon Sep 17 00:00:00 2001 From: Adam Taylor Date: Wed, 13 Dec 2023 16:54:16 +0000 Subject: [PATCH 1/2] Revert to basic filename regex --- HTAN.model.csv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 7568b7a3..16061472 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -5,7 +5,7 @@ Sequencing,Module for next generation sequencing assays,,,,FALSE,Assay,,, Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, etc.); provide the same one for all items/rows.",,,,TRUE,,,https://w3id.org/biolink/vocab/category, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, -Filename,Name of a file,,,,TRUE,,,,regex search ^(([a-zA-Z0-9\_\-\.\+\(\'\)\ ]+/)+[a-zA-Z0-9\_\-\.\+\(\'\)]+)$ +Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning @@ -42,9 +42,9 @@ scRNA-seq Level 1,Single-cell RNA-seq [EFO_0008913],,"Component, Filename, File scRNA-seq Level 2,Alignment workflows downstream of scRNA-seq Level 1,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, scRNAseq Workflow Type, Workflow Version, scRNAseq Workflow Parameters Description, Workflow Link, Genomic Reference, Genomic Reference URL, Genome Annotation URL, Checksum, Whitelist Cell Barcode File Link, Cell Barcode Tag, UMI Tag, Applied Hard Trimming",,FALSE,Sequencing,scRNA-seq Level 1,, scRNA-seq Level 3,Gene and Isoform expression files,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, Data Category, Matrix Type, Linked Matrices, Cell Median Number Reads, Cell Median Number Genes, Cell Total, scRNAseq Workflow Type, scRNAseq Workflow Parameters Description, Workflow Link, Workflow Version",,FALSE,Sequencing,scRNA-seq Level 2,, scRNA-seq Level 4,"Data represents the relationships between cells derived from Level 3 expression data and shown as tSNE or UMAP coordinates per cell, plus all other cell-specific meta information (e.g., cell type)",,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, scRNAseq Workflow Type, scRNAseq Workflow Parameters Description, Workflow Version, Workflow Link",,FALSE,Sequencing,scRNA-seq Level 3,, -Slide-seq Level 1,"Raw sequencing files for the Slide-seq assay.",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Nucleic Acid Source, Read Indicator, Spatial Read1, Spatial Read2, End Bias, Reverse Transcription Primer, Spatial Barcode Offset, Spatial Barcode and UMI, Spike In, Sequencing Platform, Technical Replicate Group, Protocol Link, Spatial Library Construction Method, Library Preparation Days from Index, Sequencing Library Construction Days from Index, Nucleic Acid Capture Days from Index",,FALSE,Spatial Transcriptomics,Biospecimen,, -Slide-seq Level 2,"Aligned sequencing files and QC for the Slide-seq assay.",,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, Slide-seq Workflow Type, Workflow Version, Slide-seq Workflow Parameter Description, Workflow Link, Genomic Reference, Genomic Reference URL, Genome Annotation URL, Checksum, Spatial Barcode Tag, Matched Spatial Barcode Tag, UMI Tag, Applied Hard Trimming",,FALSE,Spatial Transcriptomics,Slide-seq Level 1,, -Slide-seq Level 3,"Gene matrices with features and barcodes for Slide-seq as well as spatial information (bead location files).",,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, Run ID, Sequencing Batch ID, Data Category, Matrix Type, Slide-seq Workflow Type, Workflow Version, Slide-seq Workflow Parameter Description, Workflow Link, Beads Total, Median UMI Counts per Spot, Median Number Genes per Spatial Spot, Slide-seq Bead File Type, Slide-seq Fragment Size",,FALSE,Spatial Transcriptomics,Slide-seq Level 2,, +Slide-seq Level 1,Raw sequencing files for the Slide-seq assay.,,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Nucleic Acid Source, Read Indicator, Spatial Read1, Spatial Read2, End Bias, Reverse Transcription Primer, Spatial Barcode Offset, Spatial Barcode and UMI, Spike In, Sequencing Platform, Technical Replicate Group, Protocol Link, Spatial Library Construction Method, Library Preparation Days from Index, Sequencing Library Construction Days from Index, Nucleic Acid Capture Days from Index",,FALSE,Spatial Transcriptomics,Biospecimen,, +Slide-seq Level 2,Aligned sequencing files and QC for the Slide-seq assay.,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, Slide-seq Workflow Type, Workflow Version, Slide-seq Workflow Parameter Description, Workflow Link, Genomic Reference, Genomic Reference URL, Genome Annotation URL, Checksum, Spatial Barcode Tag, Matched Spatial Barcode Tag, UMI Tag, Applied Hard Trimming",,FALSE,Spatial Transcriptomics,Slide-seq Level 1,, +Slide-seq Level 3,Gene matrices with features and barcodes for Slide-seq as well as spatial information (bead location files).,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Data File ID, Run ID, Sequencing Batch ID, Data Category, Matrix Type, Slide-seq Workflow Type, Workflow Version, Slide-seq Workflow Parameter Description, Workflow Link, Beads Total, Median UMI Counts per Spot, Median Number Genes per Spatial Spot, Slide-seq Bead File Type, Slide-seq Fragment Size",,FALSE,Spatial Transcriptomics,Slide-seq Level 2,, Slide-seq Fragment Size,Average cDNA length associated with the experiemtn. Integer,,,,FALSE,Spatial Transcriptomics,,, Matched Spatial Barcode Tag,SAM tag for matched spot barcode field; please provide a valid spot barcode tag (e.g. CB:Z) (Slide-seq specific),,,,TRUE,Spatial Transcriptomics,,, Beads Total,Number of sequenced beads. Applies to raw counts matrix only. Integer,,,,FALSE,Spatial Transcriptomics,,, @@ -879,7 +879,7 @@ RPPA Validation Status,Valid = RPPA and WB correlation > 0.7; Use with Caution = Antibody Notes,Notes on antibodies replacements and antibody recognition observations.,,,,FALSE,HTAN RPPA Antibody Table,,, Pre-processing Completed,Pre-processing steps completed to convert level 1 raw data to a single level 2 image,"Illumination correction, Tile Stitching, Channel/Cycle Registration, TMA de-arraying, None, Other",,,TRUE,Imaging,,https://www.miti-consortium.org/, Pre-processing Required,Pre-processing steps required to convert level 1 raw data to a single level 2 image,"Illumination correction, Tile Stitching, Channel/Cycle Registration, TMA de-arraying, None, Other",,,TRUE,Imaging,,https://www.miti-consortium.org/, -Publication,"An empty parent attribute for publications",,,,FALSE,,,, +Publication,An empty parent attribute for publications,,,,FALSE,,,, Publication Manifest,Publication specific attributes.,,"Component,Publication-associated HTAN Parent Data File ID, HTAN Grant ID, HTAN Center ID, Publication Content Type, DOI, Title, Authors, Corresponding Author, Corresponding Author ORCID, Year of Publication, Location of Publication, Publication Abstract, License, PMID, Publication contains HTAN ID, Data Type, Tool, Supporting Link, Supporting Link Description",,FALSE,Publication,,, Publication-associated HTAN Parent Data File ID,HTAN Data File Identifier(s) of the files associated with the content presented/published. Should be comma-separated lists.,,,,TRUE,Publication,,,list like HTAN Grant ID,HTAN grant number(s) (i.e. CA------ format) associated with the content presented/published.,,,,TRUE,Publication,,,list like From 34c2e2988fe159a669476622abcf20236b5b63bd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:05:25 +0000 Subject: [PATCH 2/2] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index c517ecab..05074fc1 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -2476,7 +2476,7 @@ "sms:displayName": "Filename", "sms:required": "sms:true", "sms:validationRules": [ - "regex search ^(([a-zA-Z0-9\\_\\-\\.\\+\\(\\'\\)\\ ]+/)+[a-zA-Z0-9\\_\\-\\.\\+\\(\\'\\)]+)$" + "regex search ^.+\\/\\S*$" ] }, {