Skip to content

Commit

Permalink
Merge pull request #183 from pangeo-forge/disk_size_gb
Browse files Browse the repository at this point in the history
Add `disk_size_gb` option to DataflowBakery
  • Loading branch information
cisaacstern authored Apr 3, 2024
2 parents 167b565 + 07ae0ad commit e2faa93
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions pangeo_forge_runner/bakery/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ def _default_project_id(self):
""",
)

disk_size_gb = Integer(
None,
allow_none=True,
config=True,
help="""
The disk size, in gigabytes, to use on each remote Compute Engine worker instance.
Set to None (default) for default sizing
(see https://cloud.google.com/dataflow/docs/reference/pipeline-options#worker-level_options for details).
""",
)

use_dataflow_prime = Bool(
False,
config=True,
Expand Down Expand Up @@ -181,6 +193,7 @@ def get_pipeline_options(
project=self.project_id,
job_name=job_name,
max_num_workers=self.max_num_workers,
disk_size_gb=self.disk_size_gb,
temp_location=self.temp_gcs_location,
use_public_ips=self.use_public_ips,
region=self.region,
Expand Down

0 comments on commit e2faa93

Please sign in to comment.