Skip to content

Commit

Permalink
feat: Adds idc_current dataset for IDC-TCIA (#239)
Browse files Browse the repository at this point in the history
* adds idc_current dataset

* fix: conventional commits
  • Loading branch information
adlersantos committed Dec 14, 2021
1 parent 25d615d commit f92a2f7
Show file tree
Hide file tree
Showing 22 changed files with 35 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.analysis_results_metadata`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.auxiliary_metadata`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.dicom_all`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.dicom_metadata`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.dicom_metadata_curated`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.measurement_groups`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.nlst_canc`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.nlst_ctab`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.nlst_ctabc`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.nlst_prsn`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.nlst_screen`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.original_collections_metadata`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.qualitative_measurements`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.quantitative_measurements`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.segmentations`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.tcga_biospecimen_rel9`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.tcga_clinical_rel9`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select * from `PROJECT.idc_v5.version_metadata`
10 changes: 10 additions & 0 deletions datasets/idc/_terraform/idc_dataset.tf
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,13 @@ resource "google_bigquery_dataset" "idc_v5" {
output "bigquery_dataset-idc_v5-dataset_id" {
value = google_bigquery_dataset.idc_v5.dataset_id
}

resource "google_bigquery_dataset" "idc_current" {
dataset_id = "idc_current"
project = var.project_id
description = "Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) current data"
}

output "bigquery_dataset-idc_current-dataset_id" {
value = google_bigquery_dataset.idc_current.dataset_id
}
2 changes: 1 addition & 1 deletion datasets/idc/copy_tcia_data/copy_tcia_data_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"QUERIES_DIR": "/custom/queries",
"GCP_PROJECT": "{{ var.value.gcp_project }}",
"DATASET_NAME": "idc",
"DATASET_VERSIONS": '["v1", "v2", "v3", "v4", "v5"]',
"DATASET_VERSIONS": '["v1", "v2", "v3", "v4", "v5", "current"]',
},
resources={"limit_memory": "128M", "limit_cpu": "200m"},
)
Expand Down
4 changes: 2 additions & 2 deletions datasets/idc/copy_tcia_data/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dag:
default_args:
owner: "Google"
depends_on_past: False
start_date: '2021-11-17'
start_date: '2021-11-23'
max_active_runs: 1
schedule_interval: "@monthly"
catchup: False
Expand Down Expand Up @@ -92,7 +92,7 @@ dag:
GCP_PROJECT: "{{ var.value.gcp_project }}"
DATASET_NAME: "idc"
DATASET_VERSIONS: >-
["v1", "v2", "v3", "v4", "v5"]
["v1", "v2", "v3", "v4", "v5", "current"]
resources:
limit_memory: "128M"
limit_cpu: "200m"
Expand Down
4 changes: 4 additions & 0 deletions datasets/idc/dataset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ resources:
- type: bigquery_dataset
dataset_id: idc_v5
description: Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) v5 data

- type: bigquery_dataset
dataset_id: idc_current
description: Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) current data

0 comments on commit f92a2f7

Please sign in to comment.