From ec98dbd57769a7c75ff0a60ec6e7757df169718a Mon Sep 17 00:00:00 2001 From: sbabyanusha <61431648+sbabyanusha@users.noreply.github.com> Date: Mon, 14 Aug 2023 09:39:55 -0400 Subject: [PATCH] Genetic ancestry data for all TCGA PANCAN studies (#1885) --- public/skcm_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++- .../data_clinical_patient.txt | 4 ++-- .../data_genetic_ancestry.txt | 3 +++ .../meta_genetic_ancestry.txt | 11 ++++++++++ public/stad_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++- .../data_clinical_patient.txt | 4 ++-- .../data_genetic_ancestry.txt | 3 +++ .../meta_genetic_ancestry.txt | 11 ++++++++++ public/tgct_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++- .../data_clinical_patient.txt | 4 ++-- .../data_genetic_ancestry.txt | 3 +++ .../meta_genetic_ancestry.txt | 11 ++++++++++ public/thca_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++- .../data_clinical_patient.txt | 4 ++-- .../data_genetic_ancestry.txt | 3 +++ .../meta_genetic_ancestry.txt | 11 ++++++++++ 16 files changed, 140 insertions(+), 12 deletions(-) create mode 100644 public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt create mode 100644 public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt create mode 100644 public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt create mode 100644 public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt create mode 100644 public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt create mode 100644 public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt create mode 100644 public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt create mode 100644 public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt diff --git a/public/skcm_tcga_pan_can_atlas_2018/README.md b/public/skcm_tcga_pan_can_atlas_2018/README.md index b64cd59ae4..4372ea260f 100644 --- a/public/skcm_tcga_pan_can_atlas_2018/README.md +++ b/public/skcm_tcga_pan_can_atlas_2018/README.md @@ -7,4 +7,22 @@ - File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type. **Data Transformation** -- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) \ No newline at end of file +- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + +### The Genetic Ancestry data: + + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020 +- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type. + + +### The Methylation data: + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/node/977 +- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type. + +**Data Transformation** + - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina. \ No newline at end of file diff --git a/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt index 581665fefe..968a3223de 100644 --- a/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt +++ b/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c5c9bac3bb3bc19d009378f1bd778d649e602bd172f98895e79820356f89f74 -size 127315 +oid sha256:4430f0040cca60bcbac052edcac27301d6286f5fe8230d514737438956f09090 +size 129151 diff --git a/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt new file mode 100644 index 0000000000..dca3fcdb4a --- /dev/null +++ b/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4535e7196807acb8a64bc4b9bd1063d9a06ba7996ad465da73605ad36bf3252c +size 25934 diff --git a/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt new file mode 100644 index 0000000000..58644f6c20 --- /dev/null +++ b/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt @@ -0,0 +1,11 @@ +cancer_study_identifier: skcm_tcga_pan_can_atlas_2018 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: GENETIC_ANCESTRY +datatype: LIMIT-VALUE +stable_id: genetic_ancestry +profile_name: Genetic Ancestry +profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here. +data_filename: data_genetic_ancestry.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME +value_sort_order: ASC diff --git a/public/stad_tcga_pan_can_atlas_2018/README.md b/public/stad_tcga_pan_can_atlas_2018/README.md index b64cd59ae4..4372ea260f 100644 --- a/public/stad_tcga_pan_can_atlas_2018/README.md +++ b/public/stad_tcga_pan_can_atlas_2018/README.md @@ -7,4 +7,22 @@ - File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type. **Data Transformation** -- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) \ No newline at end of file +- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + +### The Genetic Ancestry data: + + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020 +- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type. + + +### The Methylation data: + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/node/977 +- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type. + +**Data Transformation** + - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina. \ No newline at end of file diff --git a/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt index d930cf3d9f..c5dd64011d 100644 --- a/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt +++ b/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29f304ab1bdffd2e5028b2232e42443002b6774b7965b9063e1bb996778a588d -size 132379 +oid sha256:c7937b5152ec76d1101714f840acf39cfc131d2fb59d7cd4bbeffe852e72275d +size 134195 diff --git a/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt new file mode 100644 index 0000000000..e38a274d16 --- /dev/null +++ b/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816878698393e4b5788a45499110917f1e59d8d10d34f9e921c6ce6310f9efa6 +size 25930 diff --git a/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt new file mode 100644 index 0000000000..64f7e675e0 --- /dev/null +++ b/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt @@ -0,0 +1,11 @@ +cancer_study_identifier: stad_tcga_pan_can_atlas_2018 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: GENETIC_ANCESTRY +datatype: LIMIT-VALUE +stable_id: genetic_ancestry +profile_name: Genetic Ancestry +profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here. +data_filename: data_genetic_ancestry.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME +value_sort_order: ASC \ No newline at end of file diff --git a/public/tgct_tcga_pan_can_atlas_2018/README.md b/public/tgct_tcga_pan_can_atlas_2018/README.md index b64cd59ae4..4372ea260f 100644 --- a/public/tgct_tcga_pan_can_atlas_2018/README.md +++ b/public/tgct_tcga_pan_can_atlas_2018/README.md @@ -7,4 +7,22 @@ - File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type. **Data Transformation** -- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) \ No newline at end of file +- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + +### The Genetic Ancestry data: + + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020 +- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type. + + +### The Methylation data: + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/node/977 +- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type. + +**Data Transformation** + - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina. \ No newline at end of file diff --git a/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt index bd757ce2b6..9621b9e059 100644 --- a/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt +++ b/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d298a6d5fd97145c4fe599d5fa88822c4b1d120711d60356ee0b81b0ffe9ae45 -size 47986 +oid sha256:527105f28d74dbe7b85deb30a111e25a57d44f2697a7c8bcb94d9223d3213ae0 +size 48644 diff --git a/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt new file mode 100644 index 0000000000..f7a958c0ee --- /dev/null +++ b/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc30372ba40ebb0e92c0403a494c325564a95e4804b88f2dfbb9e63ae6f4993 +size 9088 diff --git a/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt new file mode 100644 index 0000000000..080ec40b6e --- /dev/null +++ b/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt @@ -0,0 +1,11 @@ +cancer_study_identifier: tgct_tcga_pan_can_atlas_2018 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: GENETIC_ANCESTRY +datatype: LIMIT-VALUE +stable_id: genetic_ancestry +profile_name: Genetic Ancestry +profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here. +data_filename: data_genetic_ancestry.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME +value_sort_order: ASC diff --git a/public/thca_tcga_pan_can_atlas_2018/README.md b/public/thca_tcga_pan_can_atlas_2018/README.md index b64cd59ae4..4372ea260f 100644 --- a/public/thca_tcga_pan_can_atlas_2018/README.md +++ b/public/thca_tcga_pan_can_atlas_2018/README.md @@ -7,4 +7,22 @@ - File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type. **Data Transformation** -- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) \ No newline at end of file +- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + +### The Genetic Ancestry data: + + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020 +- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type. + + +### The Methylation data: + +**Data Source** +- GDAC Firehose: https://gdc.cancer.gov/node/977 +- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type. + +**Data Transformation** + - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597) + - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina. \ No newline at end of file diff --git a/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt index e8f73c7e98..0b2663ad89 100644 --- a/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt +++ b/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6eebf0f4acf982757f8f8c7a687e80cf89668107b73fa2e344442de1474b055f -size 151468 +oid sha256:7bc7060ad300f81b15f3f7498875836327957ff211158beb9b35e5bca38a4e9f +size 153576 diff --git a/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt new file mode 100644 index 0000000000..99d22fc032 --- /dev/null +++ b/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec31d153b58f3f32e50e657de4864f07a2f546e8932f5c08c29f2e1c3c761cf8 +size 28722 diff --git a/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt new file mode 100644 index 0000000000..ebe5ac3492 --- /dev/null +++ b/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt @@ -0,0 +1,11 @@ +cancer_study_identifier: thca_tcga_pan_can_atlas_2018 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: GENETIC_ANCESTRY +datatype: LIMIT-VALUE +stable_id: genetic_ancestry +profile_name: Genetic Ancestry +profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here. +data_filename: data_genetic_ancestry.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME +value_sort_order: ASC \ No newline at end of file