From eff9f471b55de67928bddac9d9908b3c00e51992 Mon Sep 17 00:00:00 2001
From: sbabyanusha <61431648+sbabyanusha@users.noreply.github.com>
Date: Fri, 11 Aug 2023 16:41:55 -0400
Subject: [PATCH] Genetic ancestry data for all TCGA PANCAN studies (#1880)
---
public/gbm_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/hnsc_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/kich_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
12 files changed, 105 insertions(+), 9 deletions(-)
create mode 100644 public/gbm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/gbm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/hnsc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/hnsc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/kich_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/kich_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
diff --git a/public/gbm_tcga_pan_can_atlas_2018/README.md b/public/gbm_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/gbm_tcga_pan_can_atlas_2018/README.md
+++ b/public/gbm_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/gbm_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/gbm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 66ef8a7dbb..93d9ef45a6 100644
--- a/public/gbm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/gbm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:0d10a64d6946b1e625c82a898c92a7353ba866ef1cecf1d9a777cb1076b72cb0
-size 115588
+oid sha256:5d0234de398cd1fd55e17bc34eb47781fddd47cbb3f2168e54f535dafa31d144
+size 118038
diff --git a/public/gbm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/gbm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..4e29065d7b
--- /dev/null
+++ b/public/gbm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29673734391592c0b542a742decef5059438db4cde48d60ae0026c3292635024
+size 28537
diff --git a/public/gbm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/gbm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..4e3678bd4a
--- /dev/null
+++ b/public/gbm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: gbm_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/hnsc_tcga_pan_can_atlas_2018/README.md b/public/hnsc_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/hnsc_tcga_pan_can_atlas_2018/README.md
+++ b/public/hnsc_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/hnsc_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/hnsc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 70d65b30a7..b92108ddcc 100644
--- a/public/hnsc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/hnsc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:5a5d8b4776704bf66b2f8e8a1804db47da98ba455dd0ae721974bc66fbd12c41
-size 156380
+oid sha256:10e6eefdf38c9fd3bd5eb160535619c54a519fb86d0eca99d708f342dd23024f
+size 158624
diff --git a/public/hnsc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/hnsc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..49434638e3
--- /dev/null
+++ b/public/hnsc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b1a1bcc613bf5cad64c202dd876a1856ecee89940f0b1b00cddd65698d5d47
+size 29910
diff --git a/public/hnsc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/hnsc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..4baf167e51
--- /dev/null
+++ b/public/hnsc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: hnsc_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/kich_tcga_pan_can_atlas_2018/README.md b/public/kich_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/kich_tcga_pan_can_atlas_2018/README.md
+++ b/public/kich_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/kich_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/kich_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 36741f8008..dc55ec0f48 100644
--- a/public/kich_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/kich_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:6324ffc8153494cfee4e603157a4384a2ae03b9678fc51f9d92cf830745014c3
-size 24526
+oid sha256:4ef7ae1e29eb9b1e24f3ace412cf343b6267e2750cf643c443ff2f1e1fd11bda
+size 24868
diff --git a/public/kich_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/kich_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..e110587d76
--- /dev/null
+++ b/public/kich_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf980b6340682d6886444e485d5bbcb01abe3356ee43e3a619f044bf895f8b6
+size 4063
diff --git a/public/kich_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/kich_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..238aa436f8
--- /dev/null
+++ b/public/kich_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: kich_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
\ No newline at end of file