From 78a91f5be788db156271be0c4f2d2fbb33555c86 Mon Sep 17 00:00:00 2001
From: sbabyanusha <61431648+sbabyanusha@users.noreply.github.com>
Date: Mon, 14 Aug 2023 11:21:22 -0400
Subject: [PATCH] Genetic ancestry data for all TCGA PANCAN studies (#1886)
---
public/thym_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/ucec_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
8 files changed, 70 insertions(+), 6 deletions(-)
create mode 100644 public/thym_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/thym_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/ucec_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/ucec_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
diff --git a/public/thym_tcga_pan_can_atlas_2018/README.md b/public/thym_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/thym_tcga_pan_can_atlas_2018/README.md
+++ b/public/thym_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/thym_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/thym_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 55cc426952..a10fe36306 100644
--- a/public/thym_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/thym_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:cb092950164553d4947b9911485a52196793c1e49c244599b5b46fd2b5ae64b6
-size 37199
+oid sha256:7b5029864c6ef6f7692631ecfde80338933659bd5b4aa873b324fe4574e151a8
+size 37803
diff --git a/public/thym_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/thym_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..fae7beea13
--- /dev/null
+++ b/public/thym_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58a602997f20408a1272418f9f5220052a352fb6778fe465789e3d983af88826
+size 7584
diff --git a/public/thym_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/thym_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..34399ec417
--- /dev/null
+++ b/public/thym_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: thym_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/ucec_tcga_pan_can_atlas_2018/README.md b/public/ucec_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/ucec_tcga_pan_can_atlas_2018/README.md
+++ b/public/ucec_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/ucec_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/ucec_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 893b144f3a..7c5659d5ae 100644
--- a/public/ucec_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/ucec_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:0302d14fc6042a73238aabd58f2e9aeb1b346be9a2ad44c121f1c5ebea2e1bf1
-size 159554
+oid sha256:e182af26e936a0bc3b4770b5c93fe83136388c3748bd7ee180f002857c8eb197
+size 162533
diff --git a/public/ucec_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/ucec_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..bf39803b1b
--- /dev/null
+++ b/public/ucec_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5f3321d897b008594b8c5d7c5f8c2b6069e8aa2e8968f6d0728236e03363d6
+size 30146
diff --git a/public/ucec_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/ucec_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..7cf4b0fb0f
--- /dev/null
+++ b/public/ucec_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: ucec_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
\ No newline at end of file