From ae1aca236d2f4b846d78498da05855e775266c6a Mon Sep 17 00:00:00 2001
From: sbabyanusha <61431648+sbabyanusha@users.noreply.github.com>
Date: Fri, 11 Aug 2023 14:16:22 -0400
Subject: [PATCH] Genetic ancestry data for all TCGA PANCAN studies (#1877)
---
public/acc_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 10 ++++++++++
public/blca_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/brca_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/cesc_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
public/chol_tcga_pan_can_atlas_2018/README.md | 20 ++++++++++++++++++-
.../data_clinical_patient.txt | 4 ++--
.../data_genetic_ancestry.txt | 3 +++
.../meta_genetic_ancestry.txt | 11 ++++++++++
20 files changed, 174 insertions(+), 15 deletions(-)
create mode 100644 public/acc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/acc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/blca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/blca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/brca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/brca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/cesc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/cesc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
create mode 100644 public/chol_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
create mode 100644 public/chol_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
diff --git a/public/acc_tcga_pan_can_atlas_2018/README.md b/public/acc_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/acc_tcga_pan_can_atlas_2018/README.md
+++ b/public/acc_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/acc_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/acc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 4a538ec05e..d8087225bb 100644
--- a/public/acc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/acc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:17c2a306a7fdcb36455c89c74b9ab5ccb697cfa7adfc1c0913709870ebddb338
-size 31230
+oid sha256:3e9338f0d969fa5174c5575dee2d9e3627252e8277ca013c0bbc6d579e66c671
+size 31688
diff --git a/public/acc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/acc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..6db7fc88a5
--- /dev/null
+++ b/public/acc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50c4c53dcdede6926d138ed4cee7a77ed6a47fa48ffc2c3d5d057730478b9e07
+size 5382
diff --git a/public/acc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/acc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..efc59f6a52
--- /dev/null
+++ b/public/acc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,10 @@
+cancer_study_identifier: acc_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: ARMLEVEL_CNA
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
diff --git a/public/blca_tcga_pan_can_atlas_2018/README.md b/public/blca_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/blca_tcga_pan_can_atlas_2018/README.md
+++ b/public/blca_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/blca_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/blca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 8dec57ed7e..725d03ebee 100644
--- a/public/blca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/blca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:1f4c60d42bea0966f5a72e76e3e25ea4af41a59a8a16b9538c0a02a6e0bbce57
-size 125317
+oid sha256:112c38fd0ab3df68ceda82eec24bcdf10d7cf6622da4622bc1dbdc162ff69d5d
+size 126963
diff --git a/public/blca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/blca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..c699e68214
--- /dev/null
+++ b/public/blca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e827efe98f6fb1fadae5361bcb77b223c41f03f7e199b9505d7ebb7c9ee1d59
+size 23759
diff --git a/public/blca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/blca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..0a2073f306
--- /dev/null
+++ b/public/blca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: blca_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/brca_tcga_pan_can_atlas_2018/README.md b/public/brca_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/brca_tcga_pan_can_atlas_2018/README.md
+++ b/public/brca_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/brca_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/brca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 849f85d623..b4acfd0825 100644
--- a/public/brca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/brca_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:10392d88e2b9077fffcb9e649276455aa1740ba22cc14d887848fce3b7183550
-size 337168
+oid sha256:c6c49d1858b2c07d93d2552c889e8fd8b519f1140ca57f774356d49f383571c9
+size 341638
diff --git a/public/brca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/brca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..ed2ef3eb4e
--- /dev/null
+++ b/public/brca_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34529dd0fcd2b06215d93f28acf8191c9a17364e145c43cadf45ad2727c5abcb
+size 62835
diff --git a/public/brca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/brca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..e007daa0b6
--- /dev/null
+++ b/public/brca_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: brca_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/cesc_tcga_pan_can_atlas_2018/README.md b/public/cesc_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/cesc_tcga_pan_can_atlas_2018/README.md
+++ b/public/cesc_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/cesc_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/cesc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index d6fbc62148..fbe32f488b 100644
--- a/public/cesc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/cesc_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:842b8b92e2c70c8e27a827e428d19e04e4cb257e609609f546d6c15f98d41dc6
-size 93965
+oid sha256:cf308dd0c06722a4b3142d5da89d9ecc323c82dd2437284b13792484a29f3fb8
+size 95319
diff --git a/public/cesc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/cesc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..c1e2114db8
--- /dev/null
+++ b/public/cesc_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42d1bcd8d2331fb9487b5c2b26370b7f89e8a93a2568209e83bfd6e6f343c9c7
+size 16783
diff --git a/public/cesc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/cesc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..f1d5e6d480
--- /dev/null
+++ b/public/cesc_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: cesc_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
diff --git a/public/chol_tcga_pan_can_atlas_2018/README.md b/public/chol_tcga_pan_can_atlas_2018/README.md
index b64cd59ae4..4372ea260f 100644
--- a/public/chol_tcga_pan_can_atlas_2018/README.md
+++ b/public/chol_tcga_pan_can_atlas_2018/README.md
@@ -7,4 +7,22 @@
- File Used: `Merge_Clinical.Level_1.20160128` (clin.merged.txt) for each cancer type.
**Data Transformation**
-- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
\ No newline at end of file
+- The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+
+### The Genetic Ancestry data:
+
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/about-data/publications/CCG-AIM-2020
+- File Used: `Admixture_by_sample.txt` (Admix percent by sample) for each cancer type.
+
+
+### The Methylation data:
+
+**Data Source**
+- GDAC Firehose: https://gdc.cancer.gov/node/977
+- File Used: `jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv` (DNA methylation 450K only beta value data matrix) for each cancer type.
+
+**Data Transformation**
+ - The detailed transformation steps are listed in the Pull Request [here](https://github.com/cBioPortal/datahub/pull/1597)
+ - The meta info for the Infinium Illumina 450k probes used for this profile is under the folder "probe_meta" "probe_450k_mapinfo_PQ.txt" is the original download from Illumina.
\ No newline at end of file
diff --git a/public/chol_tcga_pan_can_atlas_2018/data_clinical_patient.txt b/public/chol_tcga_pan_can_atlas_2018/data_clinical_patient.txt
index 9714be06f3..7c79e9a973 100644
--- a/public/chol_tcga_pan_can_atlas_2018/data_clinical_patient.txt
+++ b/public/chol_tcga_pan_can_atlas_2018/data_clinical_patient.txt
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:021576354b417bf2a7e54b25a3da20337b8cbf3e77351d6afdd7c28e37629a38
-size 16722
+oid sha256:e4c8665184edbb3d1b7afa9c906cca32cab64af769bd5579e1b6f86030f06923
+size 16946
diff --git a/public/chol_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt b/public/chol_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
new file mode 100644
index 0000000000..3fa7c01a8f
--- /dev/null
+++ b/public/chol_tcga_pan_can_atlas_2018/data_genetic_ancestry.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1f4596e1794679ab5e7e43bb99359479b3ea0dbb5b78c860a86c4186401ef6e
+size 2301
diff --git a/public/chol_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt b/public/chol_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
new file mode 100644
index 0000000000..bad1652fe2
--- /dev/null
+++ b/public/chol_tcga_pan_can_atlas_2018/meta_genetic_ancestry.txt
@@ -0,0 +1,11 @@
+cancer_study_identifier: chol_tcga_pan_can_atlas_2018
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: GENETIC_ANCESTRY
+datatype: LIMIT-VALUE
+stable_id: genetic_ancestry
+profile_name: Genetic Ancestry
+profile_description: Genetic ancestries were determined using five different methods as described in Carrot-Zhang et al (2020). These consensus calls were created based on the ancestral population that received the majority of assignments for each patient. The original data is here.
+data_filename: data_genetic_ancestry.txt
+show_profile_in_analysis_tab: true
+generic_entity_meta_properties: NAME
+value_sort_order: ASC
\ No newline at end of file