From ec98dbd57769a7c75ff0a60ec6e7757df169718a Mon Sep 17 00:00:00 2001
From: sbabyanusha <61431648+sbabyanusha@users.noreply.github.com>
Date: Mon, 14 Aug 2023 09:39:55 -0400
Subject: [PATCH] Genetic ancestry data for all TCGA PANCAN studies (#1885)
---
public/skcm_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/stad_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/tgct_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/thca_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
16 files changed, 140 insertions(+), 12 deletions(-)
create mode 100644 public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
diff --git a/public/skcm_tcga_pan_can_atlas_2018/README.md b/public/skcm_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/skcm_tcga_pan_can_atlas_2018/README.md
+++ b/public/skcm_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 581665fefe..968a3223de 100644
--- a/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/skcm_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:3c5c9bac3bb3bc19d009378f1bd778d649e602bd172f98895e79820356f89f74
-size 127315
+oid sha256:4430f0040cca60bcbac052edcac27301d6286f5fe8230d514737438956f09090
+size 129151
diff --git a/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..dca3fcdb4a
--- /dev/null
+++ b/public/skcm_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4535e7196807acb8a64bc4b9bd1063d9a06ba7996ad465da73605ad36bf3252c
+size 25934
diff --git a/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..58644f6c20
--- /dev/null
+++ b/public/skcm_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: skcm_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/stad_tcga_pan_can_atlas_2018/README.md b/public/stad_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/stad_tcga_pan_can_atlas_2018/README.md
+++ b/public/stad_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index d930cf3d9f..c5dd64011d 100644
--- a/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/stad_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:29f304ab1bdffd2e5028b2232e42443002b6774b7965b9063e1bb996778a588d
-size 132379
+oid sha256:c7937b5152ec76d1101714f840acf39cfc131d2fb59d7cd4bbeffe852e72275d
+size 134195
diff --git a/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..e38a274d16
--- /dev/null
+++ b/public/stad_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:816878698393e4b5788a45499110917f1e59d8d10d34f9e921c6ce6310f9efa6
+size 25930
diff --git a/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..64f7e675e0
--- /dev/null
+++ b/public/stad_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: stad_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
\ No newline at end of file
diff --git a/public/tgct_tcga_pan_can_atlas_2018/README.md b/public/tgct_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/tgct_tcga_pan_can_atlas_2018/README.md
+++ b/public/tgct_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index bd757ce2b6..9621b9e059 100644
--- a/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/tgct_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:d298a6d5fd97145c4fe599d5fa88822c4b1d120711d60356ee0b81b0ffe9ae45
-size 47986
+oid sha256:527105f28d74dbe7b85deb30a111e25a57d44f2697a7c8bcb94d9223d3213ae0
+size 48644
diff --git a/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..f7a958c0ee
--- /dev/null
+++ b/public/tgct_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbc30372ba40ebb0e92c0403a494c325564a95e4804b88f2dfbb9e63ae6f4993
+size 9088
diff --git a/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..080ec40b6e
--- /dev/null
+++ b/public/tgct_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: tgct_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/thca_tcga_pan_can_atlas_2018/README.md b/public/thca_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/thca_tcga_pan_can_atlas_2018/README.md
+++ b/public/thca_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index e8f73c7e98..0b2663ad89 100644
--- a/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/thca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:6eebf0f4acf982757f8f8c7a687e80cf89668107b73fa2e344442de1474b055f
-size 151468
+oid sha256:7bc7060ad300f81b15f3f7498875836327957ff211158beb9b35e5bca38a4e9f
+size 153576
diff --git a/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..99d22fc032
--- /dev/null
+++ b/public/thca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec31d153b58f3f32e50e657de4864f07a2f546e8932f5c08c29f2e1c3c761cf8
+size 28722
diff --git a/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..ebe5ac3492
--- /dev/null
+++ b/public/thca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: thca_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
\ No newline at end of file