diff --git a/.github/workflows/spanner-staging-tests.yml b/.github/workflows/spanner-staging-tests.yml
new file mode 100644
index 0000000000..51c22a991e
--- /dev/null
+++ b/.github/workflows/spanner-staging-tests.yml
@@ -0,0 +1,51 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Spanner Staging integration tests
+
+on:
+ workflow_dispatch:
+
+permissions: read-all
+
+jobs:
+ spanner_java_integration_tests_templates:
+ name: Spanner Dataflow Templates Integration Tests
+ timeout-minutes: 180
+ # Run on any runner that matches all the specified runs-on values.
+ runs-on: [ self-hosted, it ]
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+ - name: Setup Environment
+ id: setup-env
+ uses: ./.github/actions/setup-env
+ - name: Run Integration Tests
+ run: |
+ ./cicd/run-it-tests \
+ --modules-to-build="ALL" \
+ --it-region="us-central1" \
+ --it-project="cloud-teleport-testing" \
+ --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
+ --it-private-connectivity="datastream-private-connect-us-central1" \
+ --it-spanner-host="https://staging-wrenchworks.sandbox.googleapis.com/"
+ - name: Upload Integration Tests Report
+ uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+ if: always() # always run even if the previous step fails
+ with:
+ name: surefire-test-results
+ path: '**/surefire-reports/TEST-*.xml'
+ retention-days: 1
+ - name: Cleanup Java Environment
+ uses: ./.github/actions/cleanup-java-env
\ No newline at end of file
diff --git a/cicd/cmd/run-it-smoke-tests/main.go b/cicd/cmd/run-it-smoke-tests/main.go
index e5fa462235..5a2fc17fc1 100644
--- a/cicd/cmd/run-it-smoke-tests/main.go
+++ b/cicd/cmd/run-it-smoke-tests/main.go
@@ -66,6 +66,7 @@ func main() {
flags.ArtifactBucket(),
flags.StageBucket(),
flags.PrivateConnectivity(),
+ flags.SpannerHost(),
flags.FailureMode(),
flags.RetryFailures(),
flags.StaticOracleHost(),
diff --git a/cicd/cmd/run-it-tests/main.go b/cicd/cmd/run-it-tests/main.go
index 24b8145d63..8830edfb9b 100644
--- a/cicd/cmd/run-it-tests/main.go
+++ b/cicd/cmd/run-it-tests/main.go
@@ -67,6 +67,7 @@ func main() {
flags.StageBucket(),
flags.HostIp(),
flags.PrivateConnectivity(),
+ flags.SpannerHost(),
flags.FailureMode(),
flags.RetryFailures(),
flags.StaticOracleHost(),
diff --git a/cicd/internal/flags/it-flags.go b/cicd/internal/flags/it-flags.go
index 1aa55e9a08..6937353983 100644
--- a/cicd/internal/flags/it-flags.go
+++ b/cicd/internal/flags/it-flags.go
@@ -101,10 +101,10 @@ func PrivateConnectivity() string {
}
func SpannerHost() string {
- if dSpannerHost == "" {
- return "-DspannerHost=" + "https://staging-wrenchworks.sandbox.googleapis.com/"
+ if dSpannerHost != "" {
+ return "-DspannerHost=" + dSpannerHost
}
- return "-DspannerHost=" + dSpannerHost
+ return ""
}
func FailureMode() string {
diff --git a/contributor-docs/add-integration-or-load-test.md b/contributor-docs/add-integration-or-load-test.md
index ef56147eb5..4c81c39187 100644
--- a/contributor-docs/add-integration-or-load-test.md
+++ b/contributor-docs/add-integration-or-load-test.md
@@ -408,7 +408,7 @@ vary on whether the pipeline under test is a `Batch` or `Streaming` pipeline and
the type of test.
### Structure
-First extend the test class from the [LoadTestBase](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/com/google/cloud/teleport/it/gcp/LoadTestBase.java)
+First extend the test class from the [LoadTestBase](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java)
class. LoadTestBase contains helper methods which abstract irrelevant
information and make it easier to write load tests. It also defines some
clients and variables which are useful for writing tests.
@@ -552,8 +552,8 @@ public void testSteadyState1hr() {
### Exporting Results
-After the pipeline finishes successfully, we can get the performance metrics using [getMetrics](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/com/google/cloud/teleport/it/gcp/LoadTestBase.java#L272)
-method and export the results to BigQuery by calling the [exportMetricsToBigQuery](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/com/google/cloud/teleport/it/gcp/LoadTestBase.java#L127) method.
+After the pipeline finishes successfully, we can get the performance metrics using [getMetrics](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java#L279)
+method and export the results to BigQuery by calling the [exportMetricsToBigQuery](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java#L139) method.
The BigQuery project, dataset, and table to be used to export the data can be specified in the command line using,
* `-DexportProject` - BigQuery Project to export metrics (optional, if not provided `-Dproject` is used)
diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/SpannerStagingTest.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/SpannerStagingTest.java
new file mode 100644
index 0000000000..48947f267d
--- /dev/null
+++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/SpannerStagingTest.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.metadata;
+
+/** Annotation that marks the test as a Spanner staging test. */
+public @interface SpannerStagingTest {}
diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
index 979496c892..f1125b33ac 100644
--- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
+++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
@@ -652,7 +652,7 @@ protected void processDescriptions(
this.setHelpText(helpText);
if (example != null && !example.isEmpty()) {
- this.setHelpText(this.getHelpText() + " (Example: " + example + ")");
+ this.setHelpText(this.getHelpText() + " For example, `" + example + "`");
}
}
}
diff --git a/plugins/core-plugin/src/main/resources/README-template.md b/plugins/core-plugin/src/main/resources/README-template.md
index a3b01c3af5..2c80e28a2d 100644
--- a/plugins/core-plugin/src/main/resources/README-template.md
+++ b/plugins/core-plugin/src/main/resources/README-template.md
@@ -21,12 +21,12 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-<#list spec.metadata.parameters as parameter><#if !parameter.optional!false>* **${parameter.name}** : ${parameter.helpText?ensure_ends_with(".")}
+<#list spec.metadata.parameters as parameter><#if !parameter.optional!false>* **${parameter.name}**: ${parameter.helpText?ensure_ends_with(".")}
#if>#list>
### Optional parameters
-<#list spec.metadata.parameters as parameter><#if parameter.optional!false>* **${parameter.name}** : ${parameter.helpText?ensure_ends_with(".")}
+<#list spec.metadata.parameters as parameter><#if parameter.optional!false>* **${parameter.name}**: ${parameter.helpText?ensure_ends_with(".")}
#if>#list>
diff --git a/pom.xml b/pom.xml
index 297ac1ae3d..04b1d3d6d1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
com.google.cloud.teleport.metadata.TemplateLoadTest
com.google.cloud.teleport.metadata.DirectRunnerTest
+ com.google.cloud.teleport.metadata.SpannerStagingTest
JAVA_LICENSE_HEADER
@@ -458,6 +459,56 @@
+
+ spannerStagingIntegrationTests
+
+ false
+
+
+
+ true
+
+ false
+
+ classesAndMethods
+ 2
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ ${surefire.version}
+
+
+
+ beamPythonVersion
+ ${beam-python.version}
+
+
+ beamJavaVersion
+ ${beam.version}
+
+
+ beamMavenRepo
+ ${beam-maven-repo}
+
+
+
+ **/*.java
+
+
+ ${spanner.staging.tests}
+
+ true
+ ${itParallelismType}
+ ${itParallelism}
+ false
+
+
+
+
+
templatesLoadTests
diff --git a/python/README_Yaml_Template.md b/python/README_Yaml_Template.md
index 650ff8b3c0..4d2b7d07bc 100644
--- a/python/README_Yaml_Template.md
+++ b/python/README_Yaml_Template.md
@@ -25,9 +25,9 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Optional parameters
-* **yaml_pipeline** : A yaml description of the pipeline to run.
-* **yaml_pipeline_file** : A file in Cloud Storage containing a yaml description of the pipeline to run.
-* **jinja_variables** : A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.
+* **yaml_pipeline**: A yaml description of the pipeline to run.
+* **yaml_pipeline_file**: A file in Cloud Storage containing a yaml description of the pipeline to run.
+* **jinja_variables**: A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.
diff --git a/v1/README_Bulk_Compress_GCS_Files.md b/v1/README_Bulk_Compress_GCS_Files.md
index 8a7aec4625..d1bdce7bd1 100644
--- a/v1/README_Bulk_Compress_GCS_Files.md
+++ b/v1/README_Bulk_Compress_GCS_Files.md
@@ -27,14 +27,14 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.txt).
-* **outputDirectory** : The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path).
-* **outputFailureFile** : The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/compressed/failed.csv).
-* **compression** : The compression algorithm used to compress the matched files. Valid algorithms: BZIP2, DEFLATE, GZIP.
+* **inputFilePattern**: The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.txt`.
+* **outputDirectory**: The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/your-path`.
+* **outputFailureFile**: The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/compressed/failed.csv`.
+* **compression**: The compression algorithm used to compress the matched files. Valid algorithms: BZIP2, DEFLATE, GZIP.
### Optional parameters
-* **outputFilenameSuffix** : Output filename suffix of the files to write. Defaults to .bzip2, .deflate or .gz depending on the compression algorithm.
+* **outputFilenameSuffix**: Output filename suffix of the files to write. Defaults to .bzip2, .deflate or .gz depending on the compression algorithm.
@@ -211,9 +211,9 @@ resource "google_dataflow_job" "bulk_compress_gcs_files" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://your-bucket/your-files/*.txt"
- outputDirectory = "gs://your-bucket/your-path"
- outputFailureFile = "gs://your-bucket/compressed/failed.csv"
+ inputFilePattern = ""
+ outputDirectory = ""
+ outputFailureFile = ""
compression = ""
# outputFilenameSuffix = ""
}
diff --git a/v1/README_Bulk_Decompress_GCS_Files.md b/v1/README_Bulk_Decompress_GCS_Files.md
index b885462bdf..b645a00324 100644
--- a/v1/README_Bulk_Decompress_GCS_Files.md
+++ b/v1/README_Bulk_Decompress_GCS_Files.md
@@ -26,9 +26,9 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.gz).
-* **outputDirectory** : The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/decompressed/).
-* **outputFailureFile** : The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/decompressed/failed.csv).
+* **inputFilePattern**: The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.gz`.
+* **outputDirectory**: The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/decompressed/`.
+* **outputFailureFile**: The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/decompressed/failed.csv`.
### Optional parameters
@@ -202,9 +202,9 @@ resource "google_dataflow_job" "bulk_decompress_gcs_files" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://your-bucket/your-files/*.gz"
- outputDirectory = "gs://your-bucket/decompressed/"
- outputFailureFile = "gs://your-bucket/decompressed/failed.csv"
+ inputFilePattern = ""
+ outputDirectory = ""
+ outputFailureFile = ""
}
}
```
diff --git a/v1/README_Cassandra_To_Cloud_Bigtable.md b/v1/README_Cassandra_To_Cloud_Bigtable.md
index f52429a981..e322eed223 100644
--- a/v1/README_Cassandra_To_Cloud_Bigtable.md
+++ b/v1/README_Cassandra_To_Cloud_Bigtable.md
@@ -23,21 +23,21 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **cassandraHosts** : The hosts of the Apache Cassandra nodes in a comma-separated list.
-* **cassandraKeyspace** : The Apache Cassandra keyspace where the table is located.
-* **cassandraTable** : The Apache Cassandra table to copy.
-* **bigtableProjectId** : The Google Cloud project ID associated with the Bigtable instance.
-* **bigtableInstanceId** : The ID of the Bigtable instance that the Apache Cassandra table is copied to.
-* **bigtableTableId** : The name of the Bigtable table that the Apache Cassandra table is copied to.
+* **cassandraHosts**: The hosts of the Apache Cassandra nodes in a comma-separated list.
+* **cassandraKeyspace**: The Apache Cassandra keyspace where the table is located.
+* **cassandraTable**: The Apache Cassandra table to copy.
+* **bigtableProjectId**: The Google Cloud project ID associated with the Bigtable instance.
+* **bigtableInstanceId**: The ID of the Bigtable instance that the Apache Cassandra table is copied to.
+* **bigtableTableId**: The name of the Bigtable table that the Apache Cassandra table is copied to.
### Optional parameters
-* **cassandraPort** : The TCP port to use to reach Apache Cassandra on the nodes. The default value is 9042.
-* **defaultColumnFamily** : The name of the column family of the Bigtable table. The default value is default.
-* **rowKeySeparator** : The separator used to build row-keys. The default value is '#'.
-* **splitLargeRows** : The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
-* **writetimeCassandraColumnSchema** : GCS path to schema to copy Cassandra writetimes to Bigtable. The command to generate this schema is ```cqlsh -e "select json * from system_schema.columns where keyspace_name='$CASSANDRA_KEYSPACE' and table_name='$CASSANDRA_TABLE'`" > column_schema.json```. Set $WRITETIME_CASSANDRA_COLUMN_SCHEMA to a GCS path, e.g. `gs://$BUCKET_NAME/column_schema.json`. Then upload the schema to GCS: `gcloud storage cp column_schema.json $WRITETIME_CASSANDRA_COLUMN_SCHEMA`. Requires Cassandra version 2.2 onwards for JSON support.
-* **setZeroTimestamp** : The flag for setting Bigtable cell timestamp to 0 if Cassandra writetime is not present. The default behavior for when this flag is not set is to set the Bigtable cell timestamp as the template replication time, i.e. now.
+* **cassandraPort**: The TCP port to use to reach Apache Cassandra on the nodes. The default value is `9042`.
+* **defaultColumnFamily**: The name of the column family of the Bigtable table. The default value is `default`.
+* **rowKeySeparator**: The separator used to build row-keys. The default value is `#`.
+* **splitLargeRows**: The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
+* **writetimeCassandraColumnSchema**: GCS path to schema to copy Cassandra writetimes to Bigtable. The command to generate this schema is ```cqlsh -e "select json * from system_schema.columns where keyspace_name='$CASSANDRA_KEYSPACE' and table_name='$CASSANDRA_TABLE'`" > column_schema.json```. Set $WRITETIME_CASSANDRA_COLUMN_SCHEMA to a GCS path, e.g. `gs://$BUCKET_NAME/column_schema.json`. Then upload the schema to GCS: `gcloud storage cp column_schema.json $WRITETIME_CASSANDRA_COLUMN_SCHEMA`. Requires Cassandra version 2.2 onwards for JSON support.
+* **setZeroTimestamp**: The flag for setting Bigtable cell timestamp to 0 if Cassandra writetime is not present. The default behavior for when this flag is not set is to set the Bigtable cell timestamp as the template replication time, i.e. now.
diff --git a/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md b/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md
index 93517b8105..f98873f127 100644
--- a/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md
+++ b/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md
@@ -15,17 +15,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **readQuery** : A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table.
-* **datastoreWriteProjectId** : The ID of the Google Cloud project to write the Datastore entities to.
-* **errorWritePath** : The error log output file to use for write failures that occur during processing. (Example: gs://your-bucket/errors/).
+* **readQuery**: A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`.
+* **datastoreWriteProjectId**: The ID of the Google Cloud project to write the Datastore entities to.
+* **errorWritePath**: The error log output file to use for write failures that occur during processing. For example, `gs://your-bucket/errors/`.
### Optional parameters
-* **readIdColumn** : Name of the BigQuery column storing the unique identifier of the row.
-* **invalidOutputPath** : Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path).
-* **datastoreWriteEntityKind** : Datastore kind under which entities will be written in the output Google Cloud project.
-* **datastoreWriteNamespace** : Datastore namespace under which entities will be written in the output Google Cloud project.
-* **datastoreHintNumWorkers** : Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`.
+* **readIdColumn**: Name of the BigQuery column storing the unique identifier of the row.
+* **invalidOutputPath**: Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`.
+* **datastoreWriteEntityKind**: Datastore kind under which entities will be written in the output Google Cloud project.
+* **datastoreWriteNamespace**: Datastore namespace under which entities will be written in the output Google Cloud project.
+* **datastoreHintNumWorkers**: Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to `500`.
@@ -213,9 +213,9 @@ resource "google_dataflow_job" "cloud_bigquery_to_cloud_datastore" {
parameters = {
readQuery = ""
datastoreWriteProjectId = ""
- errorWritePath = "gs://your-bucket/errors/"
+ errorWritePath = ""
# readIdColumn = ""
- # invalidOutputPath = "gs://your-bucket/your-path"
+ # invalidOutputPath = ""
# datastoreWriteEntityKind = ""
# datastoreWriteNamespace = ""
# datastoreHintNumWorkers = "500"
diff --git a/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md b/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md
index 784bcaac9e..c13a6dafe9 100644
--- a/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md
+++ b/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md
@@ -22,17 +22,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **readQuery** : A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table.
-* **outputDirectory** : The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output/train` (Example: gs://mybucket/output).
+* **readQuery**: A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`.
+* **outputDirectory**: The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output`.
### Optional parameters
-* **readIdColumn** : Name of the BigQuery column storing the unique identifier of the row.
-* **invalidOutputPath** : Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path).
-* **outputSuffix** : The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`.
-* **trainingPercentage** : The percentage of query data allocated to training TFRecord files. The default value is 1, or 100%.
-* **testingPercentage** : The percentage of query data allocated to testing TFRecord files. The default value is 0, or 0%.
-* **validationPercentage** : The percentage of query data allocated to validation TFRecord files. The default value is 0, or 0%.
+* **readIdColumn**: Name of the BigQuery column storing the unique identifier of the row.
+* **invalidOutputPath**: Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`.
+* **outputSuffix**: The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`.
+* **trainingPercentage**: The percentage of query data allocated to training TFRecord files. The default value is `1`, or `100%`.
+* **testingPercentage**: The percentage of query data allocated to testing TFRecord files. The default value is `0`, or `0%`.
+* **validationPercentage**: The percentage of query data allocated to validation TFRecord files. The default value is `0`, or `0%`.
@@ -219,9 +219,9 @@ resource "google_dataflow_job" "cloud_bigquery_to_gcs_tensorflow_records" {
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
readQuery = ""
- outputDirectory = "gs://mybucket/output"
+ outputDirectory = ""
# readIdColumn = ""
- # invalidOutputPath = "gs://your-bucket/your-path"
+ # invalidOutputPath = ""
# outputSuffix = ".tfrecord"
# trainingPercentage = "1.0"
# testingPercentage = "0.0"
diff --git a/v1/README_Cloud_Bigtable_to_GCS_Avro.md b/v1/README_Cloud_Bigtable_to_GCS_Avro.md
index 09d93b37b2..05043a9619 100644
--- a/v1/README_Cloud_Bigtable_to_GCS_Avro.md
+++ b/v1/README_Cloud_Bigtable_to_GCS_Avro.md
@@ -18,15 +18,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to export.
-* **outputDirectory** : The Cloud Storage path where data is written. (Example: gs://mybucket/somefolder).
-* **filenamePrefix** : The prefix of the Avro filename. For example, `output-`. Defaults to: part.
+* **bigtableProjectId**: The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to export.
+* **outputDirectory**: The Cloud Storage path where data is written. For example, `gs://mybucket/somefolder`.
+* **filenamePrefix**: The prefix of the Avro filename. For example, `output-`. Defaults to: part.
### Optional parameters
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
@@ -209,7 +209,7 @@ resource "google_dataflow_job" "cloud_bigtable_to_gcs_avro" {
bigtableProjectId = ""
bigtableInstanceId = ""
bigtableTableId = ""
- outputDirectory = "gs://mybucket/somefolder"
+ outputDirectory = ""
filenamePrefix = "part"
# bigtableAppProfileId = "default"
}
diff --git a/v1/README_Cloud_Bigtable_to_GCS_Json.md b/v1/README_Cloud_Bigtable_to_GCS_Json.md
index ec793ebf20..0b95ea43ba 100644
--- a/v1/README_Cloud_Bigtable_to_GCS_Json.md
+++ b/v1/README_Cloud_Bigtable_to_GCS_Json.md
@@ -17,17 +17,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to read from.
-* **outputDirectory** : The Cloud Storage path where the output JSON files are stored. (Example: gs://your-bucket/your-path/).
+* **bigtableProjectId**: The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to read from.
+* **outputDirectory**: The Cloud Storage path where the output JSON files are stored. For example, `gs://your-bucket/your-path/`.
### Optional parameters
-* **filenamePrefix** : The prefix of the JSON file name. For example, "table1-". If no value is provided, defaults to `part`.
-* **userOption** : Possible values are `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to `NONE`.
-* **columnsAliases** : A comma-separated list of columns that are required for the Vertex AI Vector Search index. The columns `id` and `embedding` are required for Vertex AI Vector Search. You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, where `rowkey` has a different name than the embedding column, specify `cf:my_embedding;embedding` and, `rowkey;id`. Only use this option when the value for `userOption` is `FLATTEN`.
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
+* **filenamePrefix**: The prefix of the JSON file name. For example, `table1-`. If no value is provided, defaults to `part`.
+* **userOption**: Possible values are `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to `NONE`.
+* **columnsAliases**: A comma-separated list of columns that are required for the Vertex AI Vector Search index. The columns `id` and `embedding` are required for Vertex AI Vector Search. You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, where `rowkey` has a different name than the embedding column, specify `cf:my_embedding;embedding` and, `rowkey;id`. Only use this option when the value for `userOption` is `FLATTEN`.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
@@ -216,7 +216,7 @@ resource "google_dataflow_job" "cloud_bigtable_to_gcs_json" {
bigtableProjectId = ""
bigtableInstanceId = ""
bigtableTableId = ""
- outputDirectory = "gs://your-bucket/your-path/"
+ outputDirectory = ""
# filenamePrefix = "part"
# userOption = "NONE"
# columnsAliases = ""
diff --git a/v1/README_Cloud_Bigtable_to_GCS_Parquet.md b/v1/README_Cloud_Bigtable_to_GCS_Parquet.md
index cd005cd87d..b813769be2 100644
--- a/v1/README_Cloud_Bigtable_to_GCS_Parquet.md
+++ b/v1/README_Cloud_Bigtable_to_GCS_Parquet.md
@@ -18,16 +18,16 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The ID of the Google Cloud project that contains the Cloud Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Cloud Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Cloud Bigtable table to export.
-* **outputDirectory** : The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse the directory path for date and time formatters. For example: gs://your-bucket/your-path.
-* **filenamePrefix** : The prefix of the Parquet file name. For example, "table1-". Defaults to: part.
+* **bigtableProjectId**: The ID of the Google Cloud project that contains the Cloud Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Cloud Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Cloud Bigtable table to export.
+* **outputDirectory**: The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse the directory path for date and time formatters. For example: `gs://your-bucket/your-path`.
+* **filenamePrefix**: The prefix of the Parquet file name. For example, `table1-`. Defaults to: `part`.
### Optional parameters
-* **numShards** : The maximum number of output shards produced when writing. A higher number of shards means higher throughput for writing to Cloud Storage, but potentially higher data aggregation cost across shards when processing output Cloud Storage files. The default value is decided by Dataflow.
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
+* **numShards**: The maximum number of output shards produced when writing. A higher number of shards means higher throughput for writing to Cloud Storage, but potentially higher data aggregation cost across shards when processing output Cloud Storage files. The default value is decided by Dataflow.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
diff --git a/v1/README_Cloud_Bigtable_to_GCS_SequenceFile.md b/v1/README_Cloud_Bigtable_to_GCS_SequenceFile.md
index 47c4c7b4ea..86e5072e87 100644
--- a/v1/README_Cloud_Bigtable_to_GCS_SequenceFile.md
+++ b/v1/README_Cloud_Bigtable_to_GCS_SequenceFile.md
@@ -19,19 +19,19 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProject** : The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to export.
-* **destinationPath** : The Cloud Storage path where data is written. (Example: gs://your-bucket/your-path/).
-* **filenamePrefix** : The prefix of the SequenceFile filename. (Example: output-).
+* **bigtableProject**: The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to export.
+* **destinationPath**: The Cloud Storage path where data is written. For example, `gs://your-bucket/your-path/`.
+* **filenamePrefix**: The prefix of the SequenceFile filename. For example, `output-`.
### Optional parameters
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
-* **bigtableStartRow** : The row where to start the export from, defaults to the first row.
-* **bigtableStopRow** : The row where to stop the export, defaults to the last row.
-* **bigtableMaxVersions** : Maximum number of cell versions. Defaults to: 2147483647.
-* **bigtableFilter** : Filter string. See: http://hbase.apache.org/book.html#thrift. Defaults to empty.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
+* **bigtableStartRow**: The row where to start the export from, defaults to the first row.
+* **bigtableStopRow**: The row where to stop the export, defaults to the last row.
+* **bigtableMaxVersions**: Maximum number of cell versions. Defaults to: 2147483647.
+* **bigtableFilter**: Filter string. See: http://hbase.apache.org/book.html#thrift. Defaults to empty.
@@ -226,8 +226,8 @@ resource "google_dataflow_job" "cloud_bigtable_to_gcs_sequencefile" {
bigtableProject = ""
bigtableInstanceId = ""
bigtableTableId = ""
- destinationPath = "gs://your-bucket/your-path/"
- filenamePrefix = "output-"
+ destinationPath = ""
+ filenamePrefix = ""
# bigtableAppProfileId = ""
# bigtableStartRow = ""
# bigtableStopRow = ""
diff --git a/v1/README_Cloud_Bigtable_to_Vector_Embeddings.md b/v1/README_Cloud_Bigtable_to_Vector_Embeddings.md
index dc31ca96bc..b794016fb7 100644
--- a/v1/README_Cloud_Bigtable_to_Vector_Embeddings.md
+++ b/v1/README_Cloud_Bigtable_to_Vector_Embeddings.md
@@ -18,24 +18,24 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to read from.
-* **outputDirectory** : The Cloud Storage path where the output JSON files are stored. (Example: gs://your-bucket/your-path/).
-* **idColumn** : The fully qualified column name where the ID is stored. In the format cf:col or _key.
-* **embeddingColumn** : The fully qualified column name where the embeddings are stored. In the format cf:col or _key.
+* **bigtableProjectId**: The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to read from.
+* **outputDirectory**: The Cloud Storage path where the output JSON files are stored. For example, `gs://your-bucket/your-path/`.
+* **idColumn**: The fully qualified column name where the ID is stored. In the format `cf:col` or `_key`.
+* **embeddingColumn**: The fully qualified column name where the embeddings are stored. In the format `cf:col` or `_key`.
### Optional parameters
-* **filenamePrefix** : The prefix of the JSON filename. For example: "table1-". If no value is provided, defaults to "part".
-* **crowdingTagColumn** : The fully qualified column name where the crowding tag is stored. In the format cf:col or _key.
-* **embeddingByteSize** : The byte size of each entry in the embeddings array. For float, use the value 4. For double, use the value 8. Defaults to 4.
-* **allowRestrictsMappings** : The comma-separated, fully qualified column names for the columns to use as the allow restricts, with their aliases. In the format cf:col->alias.
-* **denyRestrictsMappings** : The comma-separated, fully qualified column names for the columns to use as the deny restricts, with their aliases. In the format cf:col->alias.
-* **intNumericRestrictsMappings** : The comma-separated, fully qualified column names of the columns to use as integer numeric_restricts, with their aliases. In the format cf:col->alias.
-* **floatNumericRestrictsMappings** : The comma-separated, fully qualified column names of the columns to use as float (4 bytes) numeric_restricts, with their aliases. In the format cf:col->alias.
-* **doubleNumericRestrictsMappings** : The comma-separated, fully qualified column names of the columns to use as double (8 bytes) numeric_restricts, with their aliases. In the format cf:col->alias.
-* **bigtableAppProfileId** : The ID of the Cloud Bigtable app profile to be used for the export. Defaults to: default.
+* **filenamePrefix**: The prefix of the JSON filename. For example: `table1-`. If no value is provided, defaults to `part`.
+* **crowdingTagColumn**: The fully qualified column name where the crowding tag is stored. In the format `cf:col` or `_key`.
+* **embeddingByteSize**: The byte size of each entry in the embeddings array. For float, use the value `4`. For double, use the value `8`. Defaults to `4`.
+* **allowRestrictsMappings**: The comma-separated, fully qualified column names for the columns to use as the allow restricts, with their aliases. In the format `cf:col->alias`.
+* **denyRestrictsMappings**: The comma-separated, fully qualified column names for the columns to use as the deny restricts, with their aliases. In the format `cf:col->alias`.
+* **intNumericRestrictsMappings**: The comma-separated, fully qualified column names of the columns to use as integer numeric_restricts, with their aliases. In the format `cf:col->alias`.
+* **floatNumericRestrictsMappings**: The comma-separated, fully qualified column names of the columns to use as float (4 bytes) numeric_restricts, with their aliases. In the format `cf:col->alias`.
+* **doubleNumericRestrictsMappings**: The comma-separated, fully qualified column names of the columns to use as double (8 bytes) numeric_restricts, with their aliases. In the format `cf:col->alias`.
+* **bigtableAppProfileId**: The ID of the Cloud Bigtable app profile to be used for the export. Defaults to: default.
@@ -245,7 +245,7 @@ resource "google_dataflow_job" "cloud_bigtable_to_vector_embeddings" {
bigtableProjectId = ""
bigtableInstanceId = ""
bigtableTableId = ""
- outputDirectory = "gs://your-bucket/your-path/"
+ outputDirectory = ""
idColumn = ""
embeddingColumn = ""
# filenamePrefix = "part"
diff --git a/v1/README_Cloud_PubSub_to_Avro.md b/v1/README_Cloud_PubSub_to_Avro.md
index 1ba243aa6b..feb93d6ef7 100644
--- a/v1/README_Cloud_PubSub_to_Avro.md
+++ b/v1/README_Cloud_PubSub_to_Avro.md
@@ -18,20 +18,20 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputTopic** : The Pub/Sub topic to subscribe to for message consumption. The topic name must be in the format projects//topics/.
-* **outputDirectory** : The output directory where output Avro files are archived. Must contain / at the end. For example: gs://example-bucket/example-directory/.
-* **avroTempDirectory** : The directory for temporary Avro files. Must contain / at the end. For example: gs://example-bucket/example-directory/.
+* **inputTopic**: The Pub/Sub topic to subscribe to for message consumption. The topic name must be in the format `projects//topics/`.
+* **outputDirectory**: The output directory where output Avro files are archived. Must contain `/` at the end. For example: `gs://example-bucket/example-directory/`.
+* **avroTempDirectory**: The directory for temporary Avro files. Must contain `/` at the end. For example: `gs://example-bucket/example-directory/`.
### Optional parameters
-* **outputFilenamePrefix** : The output filename prefix for the Avro files. Defaults to: output.
-* **outputFilenameSuffix** : The output filename suffix for the Avro files. Defaults to empty.
-* **outputShardTemplate** : The shard template defines the dynamic portion of each windowed file. By default, the pipeline uses a single shard for output to the file system within each window. Therefore, all data outputs into a single file per window. The `outputShardTemplate` defaults `to W-P-SS-of-NN`, where `W` is the window date range, `P` is the pane info, `S` is the shard number, and `N` is the number of shards. In case of a single file, the `SS-of-NN` portion of the `outputShardTemplate` is `00-of-01`.
-* **yearPattern** : Pattern for formatting the year. Must be one or more of `y` or `Y`. Case makes no difference in the year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `YYYY`.
-* **monthPattern** : Pattern for formatting the month. Must be one or more of the `M` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `MM`.
-* **dayPattern** : Pattern for formatting the day. Must be one or more of `d` for day of month or `D` for day of year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `dd`.
-* **hourPattern** : Pattern for formatting the hour. Must be one or more of the `H` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `HH`.
-* **minutePattern** : Pattern for formatting the minute. Must be one or more of the `m` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `mm`.
+* **outputFilenamePrefix**: The output filename prefix for the Avro files. Defaults to: output.
+* **outputFilenameSuffix**: The output filename suffix for the Avro files. Defaults to empty.
+* **outputShardTemplate**: The shard template defines the dynamic portion of each windowed file. By default, the pipeline uses a single shard for output to the file system within each window. Therefore, all data outputs into a single file per window. The `outputShardTemplate` defaults `to W-P-SS-of-NN`, where `W` is the window date range, `P` is the pane info, `S` is the shard number, and `N` is the number of shards. In case of a single file, the `SS-of-NN` portion of the `outputShardTemplate` is `00-of-01`.
+* **yearPattern**: Pattern for formatting the year. Must be one or more of `y` or `Y`. Case makes no difference in the year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `YYYY`.
+* **monthPattern**: Pattern for formatting the month. Must be one or more of the `M` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `MM`.
+* **dayPattern**: Pattern for formatting the day. Must be one or more of `d` for day of month or `D` for day of year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `dd`.
+* **hourPattern**: Pattern for formatting the hour. Must be one or more of the `H` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `HH`.
+* **minutePattern**: Pattern for formatting the minute. Must be one or more of the `m` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `mm`.
diff --git a/v1/README_Cloud_PubSub_to_Cloud_PubSub.md b/v1/README_Cloud_PubSub_to_Cloud_PubSub.md
index 6466412598..6158092673 100644
--- a/v1/README_Cloud_PubSub_to_Cloud_PubSub.md
+++ b/v1/README_Cloud_PubSub_to_Cloud_PubSub.md
@@ -21,13 +21,13 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputSubscription** : The Pub/Sub subscription to read the input from. (Example: projects/your-project-id/subscriptions/your-subscription-name).
-* **outputTopic** : The Pub/Sub topic to write the output to. (Example: projects/your-project-id/topics/your-topic-name).
+* **inputSubscription**: The Pub/Sub subscription to read the input from. For example, `projects/your-project-id/subscriptions/your-subscription-name`.
+* **outputTopic**: The Pub/Sub topic to write the output to. For example, `projects/your-project-id/topics/your-topic-name`.
### Optional parameters
-* **filterKey** : The attribute key to use to filter events. No filters are applied if `filterKey` is not specified.
-* **filterValue** : The attribute value to use to filter events when a `filterKey` is provided. By default, a null `filterValue` is used.
+* **filterKey**: The attribute key to use to filter events. No filters are applied if `filterKey` is not specified.
+* **filterValue**: The attribute value to use to filter events when a `filterKey` is provided. By default, a null `filterValue` is used.
@@ -201,8 +201,8 @@ resource "google_dataflow_job" "cloud_pubsub_to_cloud_pubsub" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputSubscription = "projects/your-project-id/subscriptions/your-subscription-name"
- outputTopic = "projects/your-project-id/topics/your-topic-name"
+ inputSubscription = ""
+ outputTopic = ""
# filterKey = ""
# filterValue = ""
}
diff --git a/v1/README_Cloud_PubSub_to_Datadog.md b/v1/README_Cloud_PubSub_to_Datadog.md
index 5fc7099e00..fbfd2fedf3 100644
--- a/v1/README_Cloud_PubSub_to_Datadog.md
+++ b/v1/README_Cloud_PubSub_to_Datadog.md
@@ -33,22 +33,22 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputSubscription** : The Pub/Sub subscription to read the input from. (Example: projects/your-project-id/subscriptions/your-subscription-name).
-* **url** : The Datadog Logs API URL. This URL must be routable from the VPC that the pipeline runs in. See Send logs (https://docs.datadoghq.com/api/latest/logs/#send-logs) in the Datadog documentation for more information. (Example: https://http-intake.logs.datadoghq.com).
-* **outputDeadletterTopic** : The Pub/Sub topic to forward undeliverable messages to. For example, projects//topics/.
+* **inputSubscription**: The Pub/Sub subscription to read the input from. For example, `projects/your-project-id/subscriptions/your-subscription-name`.
+* **url**: The Datadog Logs API URL. This URL must be routable from the VPC that the pipeline runs in. See Send logs (https://docs.datadoghq.com/api/latest/logs/#send-logs) in the Datadog documentation for more information. For example, `https://http-intake.logs.datadoghq.com`.
+* **outputDeadletterTopic**: The Pub/Sub topic to forward undeliverable messages to. For example, `projects//topics/`.
### Optional parameters
-* **apiKey** : The Datadog API key. You must provide this value if the `apiKeySource` is set to `PLAINTEXT` or `KMS`. For more information, see API and Application Keys (https://docs.datadoghq.com/account_management/api-app-keys/) in the Datadog documentation.
-* **batchCount** : The batch size for sending multiple events to Datadog. The default is `1` (no batching).
-* **parallelism** : The maximum number of parallel requests. The default is `1` (no parallelism).
-* **includePubsubMessage** : Whether to include the full Pub/Sub message in the payload. The default is `true` (all elements, including the data element, are included in the payload).
-* **apiKeyKMSEncryptionKey** : The Cloud KMS key to use to decrypt the API Key. You must provide this parameter if the `apiKeySource` is set to `KMS`. If the Cloud KMS key is provided, you must pass in an encrypted API Key. (Example: projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name).
-* **apiKeySecretId** : The Secret Manager secret ID for the API Key. You must provide this parameter if the `apiKeySource` is set to `SECRET_MANAGER`. (Example: projects/your-project-id/secrets/your-secret/versions/your-secret-version).
-* **apiKeySource** : The source of the API key. The following values are supported: `PLAINTEXT`, `KMS`, and `SECRET_MANAGER`. You must provide this parameter if you're using Secret Manager. If `apiKeySource` is set to `KMS`, you must also provide `apiKeyKMSEncryptionKey` and encrypted `API Key`. If `apiKeySource` is set to `SECRET_MANAGER`, you must also provide `apiKeySecretId`. If `apiKeySource` is set to `PLAINTEXT`, you must also provide `apiKey`.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **javascriptTextTransformReloadIntervalMinutes** : Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
+* **apiKey**: The Datadog API key. You must provide this value if the `apiKeySource` is set to `PLAINTEXT` or `KMS`. For more information, see API and Application Keys (https://docs.datadoghq.com/account_management/api-app-keys/) in the Datadog documentation.
+* **batchCount**: The batch size for sending multiple events to Datadog. The default is `1` (no batching).
+* **parallelism**: The maximum number of parallel requests. The default is `1` (no parallelism).
+* **includePubsubMessage**: Whether to include the full Pub/Sub message in the payload. The default is `true` (all elements, including the data element, are included in the payload).
+* **apiKeyKMSEncryptionKey**: The Cloud KMS key to use to decrypt the API Key. You must provide this parameter if the `apiKeySource` is set to `KMS`. If the Cloud KMS key is provided, you must pass in an encrypted API Key. For example, `projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name`.
+* **apiKeySecretId**: The Secret Manager secret ID for the API Key. You must provide this parameter if the `apiKeySource` is set to `SECRET_MANAGER`. For example, `projects/your-project-id/secrets/your-secret/versions/your-secret-version`.
+* **apiKeySource**: The source of the API key. The following values are supported: `PLAINTEXT`, `KMS`, and `SECRET_MANAGER`. You must provide this parameter if you're using Secret Manager. If `apiKeySource` is set to `KMS`, you must also provide `apiKeyKMSEncryptionKey` and encrypted `API Key`. If `apiKeySource` is set to `SECRET_MANAGER`, you must also provide `apiKeySecretId`. If `apiKeySource` is set to `PLAINTEXT`, you must also provide `apiKey`.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformReloadIntervalMinutes**: Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
## User-Defined functions (UDFs)
@@ -259,15 +259,15 @@ resource "google_dataflow_job" "cloud_pubsub_to_datadog" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputSubscription = "projects/your-project-id/subscriptions/your-subscription-name"
- url = "https://http-intake.logs.datadoghq.com"
+ inputSubscription = ""
+ url = ""
outputDeadletterTopic = ""
# apiKey = ""
# batchCount = ""
# parallelism = ""
# includePubsubMessage = "true"
- # apiKeyKMSEncryptionKey = "projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name"
- # apiKeySecretId = "projects/your-project-id/secrets/your-secret/versions/your-secret-version"
+ # apiKeyKMSEncryptionKey = ""
+ # apiKeySecretId = ""
# apiKeySource = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
diff --git a/v1/README_Cloud_PubSub_to_GCS_Text.md b/v1/README_Cloud_PubSub_to_GCS_Text.md
index 967c31dd2a..dab723395f 100644
--- a/v1/README_Cloud_PubSub_to_GCS_Text.md
+++ b/v1/README_Cloud_PubSub_to_GCS_Text.md
@@ -19,20 +19,20 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **outputDirectory** : The path and filename prefix for writing output files. For example, `gs://bucket-name/path/`. This value must end in a slash.
-* **outputFilenamePrefix** : The prefix to place on each windowed file. For example, `output-`. Defaults to: output.
+* **outputDirectory**: The path and filename prefix for writing output files. For example, `gs://bucket-name/path/`. This value must end in a slash.
+* **outputFilenamePrefix**: The prefix to place on each windowed file. For example, `output-`. Defaults to: output.
### Optional parameters
-* **inputTopic** : The Pub/Sub topic to read the input from. The topic name should be in the format `projects//topics/`.
-* **userTempLocation** : The user provided directory to output temporary files to. Must end with a slash.
-* **outputFilenameSuffix** : The suffix to place on each windowed file. Typically a file extension such as `.txt` or `.csv`. Defaults to empty.
-* **outputShardTemplate** : The shard template defines the dynamic portion of each windowed file. By default, the pipeline uses a single shard for output to the file system within each window. Therefore, all data outputs into a single file per window. The `outputShardTemplate` defaults `to W-P-SS-of-NN`, where `W` is the window date range, `P` is the pane info, `S` is the shard number, and `N` is the number of shards. In case of a single file, the `SS-of-NN` portion of the `outputShardTemplate` is `00-of-01`.
-* **yearPattern** : Pattern for formatting the year. Must be one or more of `y` or `Y`. Case makes no difference in the year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `YYYY`.
-* **monthPattern** : Pattern for formatting the month. Must be one or more of the `M` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `MM`.
-* **dayPattern** : Pattern for formatting the day. Must be one or more of `d` for day of month or `D` for day of year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `dd`.
-* **hourPattern** : Pattern for formatting the hour. Must be one or more of the `H` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `HH`.
-* **minutePattern** : Pattern for formatting the minute. Must be one or more of the `m` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory ('/') character. Defaults to `mm`.
+* **inputTopic**: The Pub/Sub topic to read the input from. The topic name should be in the format `projects//topics/`.
+* **userTempLocation**: The user provided directory to output temporary files to. Must end with a slash.
+* **outputFilenameSuffix**: The suffix to place on each windowed file. Typically a file extension such as `.txt` or `.csv`. Defaults to empty.
+* **outputShardTemplate**: The shard template defines the dynamic portion of each windowed file. By default, the pipeline uses a single shard for output to the file system within each window. Therefore, all data outputs into a single file per window. The `outputShardTemplate` defaults `to W-P-SS-of-NN`, where `W` is the window date range, `P` is the pane info, `S` is the shard number, and `N` is the number of shards. In case of a single file, the `SS-of-NN` portion of the `outputShardTemplate` is `00-of-01`.
+* **yearPattern**: Pattern for formatting the year. Must be one or more of `y` or `Y`. Case makes no difference in the year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `YYYY`.
+* **monthPattern**: Pattern for formatting the month. Must be one or more of the `M` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `MM`.
+* **dayPattern**: Pattern for formatting the day. Must be one or more of `d` for day of month or `D` for day of year. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `dd`.
+* **hourPattern**: Pattern for formatting the hour. Must be one or more of the `H` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `HH`.
+* **minutePattern**: Pattern for formatting the minute. Must be one or more of the `m` character. Optionally, wrap the pattern with characters that aren't alphanumeric or the directory (`/`) character. Defaults to `mm`.
diff --git a/v1/README_Cloud_PubSub_to_Splunk.md b/v1/README_Cloud_PubSub_to_Splunk.md
index 45567991cf..c3bceeb015 100644
--- a/v1/README_Cloud_PubSub_to_Splunk.md
+++ b/v1/README_Cloud_PubSub_to_Splunk.md
@@ -33,26 +33,26 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputSubscription** : The Pub/Sub subscription to read the input from. (Example: projects/your-project-id/subscriptions/your-subscription-name).
-* **url** : The Splunk HEC URL. The URL must be routable from the VPC that the pipeline runs in. (Example: https://splunk-hec-host:8088).
-* **outputDeadletterTopic** : The Pub/Sub topic to forward undeliverable messages to. For example, projects//topics/.
+* **inputSubscription**: The Pub/Sub subscription to read the input from. For example, `projects/your-project-id/subscriptions/your-subscription-name`.
+* **url**: The Splunk HEC URL. The URL must be routable from the VPC that the pipeline runs in. For example, `https://splunk-hec-host:8088`.
+* **outputDeadletterTopic**: The Pub/Sub topic to forward undeliverable messages to. For example, `projects//topics/`.
### Optional parameters
-* **token** : The Splunk HEC authentication token. Must be provided if the `tokenSource` parameter is set to `PLAINTEXT` or `KMS`.
-* **batchCount** : The batch size for sending multiple events to Splunk. Defaults to 1 (no batching).
-* **disableCertificateValidation** : Disable SSL certificate validation. Default false (validation enabled). If true, the certificates are not validated (all certificates are trusted) and `rootCaCertificatePath` parameter is ignored.
-* **parallelism** : The maximum number of parallel requests. Defaults to 1 (no parallelism).
-* **includePubsubMessage** : Include the full Pub/Sub message in the payload. Default false (only the data element is included in the payload).
-* **tokenKMSEncryptionKey** : The Cloud KMS key to use to decrypt the HEC token string. This parameter must be provided when tokenSource is set to KMS. If the Cloud KMS key is provided, the HEC token string `must` be passed in encrypted. (Example: projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name).
-* **tokenSecretId** : The Secret Manager secret ID for the token. This parameter must provided when the tokenSource is set to SECRET_MANAGER. (Example: projects/your-project-id/secrets/your-secret/versions/your-secret-version).
-* **tokenSource** : The source of the token. The following values are allowed: `PLAINTEXT`, `KMS`, and `SECRET_MANAGER`. You must provide this parameter when Secret Manager is used. If `tokenSource` is set to `KMS`, `tokenKMSEncryptionKey`, and encrypted, then `token` must be provided. If `tokenSource` is set to `SECRET_MANAGER`, then `tokenSecretId` must be provided. If `tokenSource` is set to `PLAINTEXT`, then `token` must be provided.
-* **rootCaCertificatePath** : The full URL to the root CA certificate in Cloud Storage. The certificate provided in Cloud Storage must be DER-encoded and can be supplied in binary or printable (Base64) encoding. If the certificate is provided in Base64 encoding, it must be bounded at the beginning by -----BEGIN CERTIFICATE-----, and must be bounded at the end by -----END CERTIFICATE-----. If this parameter is provided, this private CA certificate file is fetched and added to the Dataflow worker's trust store in order to verify the Splunk HEC endpoint's SSL certificate. If this parameter is not provided, the default trust store is used. (Example: gs://mybucket/mycerts/privateCA.crt).
-* **enableBatchLogs** : Specifies whether logs should be enabled for batches written to Splunk. Default: `true`.
-* **enableGzipHttpCompression** : Specifies whether HTTP requests sent to Splunk HEC should be compressed (gzip content encoded). Default: `true`.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **javascriptTextTransformReloadIntervalMinutes** : Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
+* **token**: The Splunk HEC authentication token. Must be provided if the `tokenSource` parameter is set to `PLAINTEXT` or `KMS`.
+* **batchCount**: The batch size for sending multiple events to Splunk. Defaults to `1` (no batching).
+* **disableCertificateValidation**: Disable SSL certificate validation. Default `false` (validation enabled). If `true`, the certificates are not validated (all certificates are trusted) and `rootCaCertificatePath` parameter is ignored.
+* **parallelism**: The maximum number of parallel requests. Defaults to `1` (no parallelism).
+* **includePubsubMessage**: Include the full Pub/Sub message in the payload. Default `false` (only the data element is included in the payload).
+* **tokenKMSEncryptionKey**: The Cloud KMS key to use to decrypt the HEC token string. This parameter must be provided when tokenSource is set to KMS. If the Cloud KMS key is provided, the HEC token string must be passed in encrypted. For example, `projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name`.
+* **tokenSecretId**: The Secret Manager secret ID for the token. This parameter must provided when the tokenSource is set to `SECRET_MANAGER`. For example, `projects/your-project-id/secrets/your-secret/versions/your-secret-version`.
+* **tokenSource**: The source of the token. The following values are allowed: `PLAINTEXT`, `KMS`, and `SECRET_MANAGER`. You must provide this parameter when Secret Manager is used. If `tokenSource` is set to `KMS`, `tokenKMSEncryptionKey`, and encrypted, then `token` must be provided. If `tokenSource` is set to `SECRET_MANAGER`, then `tokenSecretId` must be provided. If `tokenSource` is set to `PLAINTEXT`, then `token` must be provided.
+* **rootCaCertificatePath**: The full URL to the root CA certificate in Cloud Storage. The certificate provided in Cloud Storage must be DER-encoded and can be supplied in binary or printable (Base64) encoding. If the certificate is provided in Base64 encoding, it must be bounded at the beginning by -----BEGIN CERTIFICATE-----, and must be bounded at the end by -----END CERTIFICATE-----. If this parameter is provided, this private CA certificate file is fetched and added to the Dataflow worker's trust store in order to verify the Splunk HEC endpoint's SSL certificate. If this parameter is not provided, the default trust store is used. For example, `gs://mybucket/mycerts/privateCA.crt`.
+* **enableBatchLogs**: Specifies whether logs should be enabled for batches written to Splunk. Default: `true`.
+* **enableGzipHttpCompression**: Specifies whether HTTP requests sent to Splunk HEC should be compressed (gzip content encoded). Default: `true`.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformReloadIntervalMinutes**: Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
## User-Defined functions (UDFs)
@@ -275,18 +275,18 @@ resource "google_dataflow_job" "cloud_pubsub_to_splunk" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputSubscription = "projects/your-project-id/subscriptions/your-subscription-name"
- url = "https://splunk-hec-host:8088"
+ inputSubscription = ""
+ url = ""
outputDeadletterTopic = ""
# token = ""
# batchCount = ""
# disableCertificateValidation = ""
# parallelism = ""
# includePubsubMessage = ""
- # tokenKMSEncryptionKey = "projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name"
- # tokenSecretId = "projects/your-project-id/secrets/your-secret/versions/your-secret-version"
+ # tokenKMSEncryptionKey = ""
+ # tokenSecretId = ""
# tokenSource = ""
- # rootCaCertificatePath = "gs://mybucket/mycerts/privateCA.crt"
+ # rootCaCertificatePath = ""
# enableBatchLogs = "true"
# enableGzipHttpCompression = "true"
# javascriptTextTransformGcsPath = ""
diff --git a/v1/README_Cloud_Spanner_to_GCS_Avro.md b/v1/README_Cloud_Spanner_to_GCS_Avro.md
index eddd41c13e..65995ab6fd 100644
--- a/v1/README_Cloud_Spanner_to_GCS_Avro.md
+++ b/v1/README_Cloud_Spanner_to_GCS_Avro.md
@@ -33,21 +33,21 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **instanceId** : The instance ID of the Spanner database that you want to export.
-* **databaseId** : The database ID of the Spanner database that you want to export.
-* **outputDir** : The Cloud Storage path to export Avro files to. The export job creates a new directory under this path that contains the exported files. (Example: gs://your-bucket/your-path).
+* **instanceId**: The instance ID of the Spanner database that you want to export.
+* **databaseId**: The database ID of the Spanner database that you want to export.
+* **outputDir**: The Cloud Storage path to export Avro files to. The export job creates a new directory under this path that contains the exported files. For example, `gs://your-bucket/your-path`.
### Optional parameters
-* **avroTempDirectory** : The Cloud Storage path where temporary Avro files are written.
-* **spannerHost** : The Cloud Spanner endpoint to call in the template. Only used for testing. (Example: https://batch-spanner.googleapis.com). Defaults to: https://batch-spanner.googleapis.com.
-* **snapshotTime** : The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC `Zulu` format. The timestamp must be in the past, and maximum timestamp staleness applies. (Example: 1990-12-31T23:59:60Z). Defaults to empty.
-* **spannerProjectId** : The ID of the Google Cloud project that contains the Spanner database that you want to read data from.
-* **shouldExportTimestampAsLogicalType** : If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this parameter is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.
-* **tableNames** : A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must either include all of the related tables (parent tables and foreign key referenced tables) or set the `shouldExportRelatedTables` parameter to `true`.If the table is in named schema, please use fully qualified name. For example: `sch1.foo` in which `sch1` is the schema name and `foo` is the table name. Defaults to empty.
-* **shouldExportRelatedTables** : Whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter. Defaults to: false.
-* **spannerPriority** : The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
-* **dataBoostEnabled** : Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When set to `true`, you also need the `spanner.databases.useDataBoost` IAM permission. For more information, see the Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: false.
+* **avroTempDirectory**: The Cloud Storage path where temporary Avro files are written.
+* **spannerHost**: The Cloud Spanner endpoint to call in the template. Only used for testing. For example, `https://batch-spanner.googleapis.com`. Defaults to: https://batch-spanner.googleapis.com.
+* **snapshotTime**: The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC `Zulu` format. The timestamp must be in the past, and maximum timestamp staleness applies. For example, `1990-12-31T23:59:60Z`. Defaults to empty.
+* **spannerProjectId**: The ID of the Google Cloud project that contains the Spanner database that you want to read data from.
+* **shouldExportTimestampAsLogicalType**: If `true`, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this parameter is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.
+* **tableNames**: A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must either include all of the related tables (parent tables and foreign key referenced tables) or set the `shouldExportRelatedTables` parameter to `true`.If the table is in named schema, please use fully qualified name. For example: `sch1.foo` in which `sch1` is the schema name and `foo` is the table name. Defaults to empty.
+* **shouldExportRelatedTables**: Whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter. Defaults to: false.
+* **spannerPriority**: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
+* **dataBoostEnabled**: Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When set to `true`, you also need the `spanner.databases.useDataBoost` IAM permission. For more information, see the Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: false.
@@ -247,10 +247,10 @@ resource "google_dataflow_job" "cloud_spanner_to_gcs_avro" {
parameters = {
instanceId = ""
databaseId = ""
- outputDir = "gs://your-bucket/your-path"
+ outputDir = ""
# avroTempDirectory = ""
# spannerHost = "https://batch-spanner.googleapis.com"
- # snapshotTime = "1990-12-31T23:59:60Z"
+ # snapshotTime = ""
# spannerProjectId = ""
# shouldExportTimestampAsLogicalType = "false"
# tableNames = ""
diff --git a/v1/README_Cloud_Spanner_vectors_to_Cloud_Storage.md b/v1/README_Cloud_Spanner_vectors_to_Cloud_Storage.md
index 4a1cb4512a..a19a3106a6 100644
--- a/v1/README_Cloud_Spanner_vectors_to_Cloud_Storage.md
+++ b/v1/README_Cloud_Spanner_vectors_to_Cloud_Storage.md
@@ -25,20 +25,20 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **spannerProjectId** : The project ID of the Spanner instance.
-* **spannerInstanceId** : The ID of the Spanner instance to export the vector embeddings from.
-* **spannerDatabaseId** : The ID of the Spanner database to export the vector embeddings from.
-* **spannerTable** : The Spanner table to read from.
-* **spannerColumnsToExport** : A comma-separated list of required columns for the Vertex AI Vector Search index. The ID and embedding columns are required by Vector Search. If your column names don't match the Vertex AI Vector Search index input structure, create column mappings by using aliases. If the column names don't match the format expected by Vertex AI, use the notation from:to. For example, if you have columns named id and my_embedding, specify id, my_embedding:embedding.
-* **gcsOutputFolder** : The Cloud Storage folder to write output files to. The path must end with a slash. (Example: gs://your-bucket/folder1/).
-* **gcsOutputFilePrefix** : The filename prefix for writing output files. (Example: vector-embeddings).
+* **spannerProjectId**: The project ID of the Spanner instance.
+* **spannerInstanceId**: The ID of the Spanner instance to export the vector embeddings from.
+* **spannerDatabaseId**: The ID of the Spanner database to export the vector embeddings from.
+* **spannerTable**: The Spanner table to read from.
+* **spannerColumnsToExport**: A comma-separated list of required columns for the Vertex AI Vector Search index. The ID and embedding columns are required by Vector Search. If your column names don't match the Vertex AI Vector Search index input structure, create column mappings by using aliases. If the column names don't match the format expected by Vertex AI, use the notation from:to. For example, if you have columns named id and my_embedding, specify id, my_embedding:embedding.
+* **gcsOutputFolder**: The Cloud Storage folder to write output files to. The path must end with a slash. For example, `gs://your-bucket/folder1/`.
+* **gcsOutputFilePrefix**: The filename prefix for writing output files. For example, `vector-embeddings`.
### Optional parameters
-* **spannerHost** : The Spanner endpoint to call in the template. The default value is https://batch-spanner.googleapis.com. (Example: https://batch-spanner.googleapis.com).
-* **spannerVersionTime** : If set, specifies the time when the database version must be taken. The value is a string in the RFC-3339 date format in Unix epoch time. For example: 1990-12-31T23:59:60Z. The timestamp must be in the past, and maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies. If not set, a strong bound (https://cloud.google.com/spanner/docs/timestamp-bounds#strong) is used to read the latest data. Defaults to empty. (Example: 1990-12-31T23:59:60Z).
-* **spannerDataBoostEnabled** : When set to true, the template uses Spanner on-demand compute. The export job runs on independent compute resources that don't impact current Spanner workloads. Using this option incurs additional charges in Spanner. For more information, see Spanner Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: false.
-* **spannerPriority** : The request priority for Spanner calls. The allowed values are HIGH, MEDIUM, and LOW. The default value is MEDIUM.
+* **spannerHost**: The Spanner endpoint to call in the template. The default value is https://batch-spanner.googleapis.com. For example, `https://batch-spanner.googleapis.com`.
+* **spannerVersionTime**: If set, specifies the time when the database version must be taken. The value is a string in the RFC-3339 date format in Unix epoch time. For example: `1990-12-31T23:59:60Z`. The timestamp must be in the past, and maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies. If not set, a strong bound (https://cloud.google.com/spanner/docs/timestamp-bounds#strong) is used to read the latest data. Defaults to `empty`. For example, `1990-12-31T23:59:60Z`.
+* **spannerDataBoostEnabled**: When set to `true`, the template uses Spanner on-demand compute. The export job runs on independent compute resources that don't impact current Spanner workloads. Using this option incurs additional charges in Spanner. For more information, see Spanner Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: `false`.
+* **spannerPriority**: The request priority for Spanner calls. The allowed values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
@@ -238,10 +238,10 @@ resource "google_dataflow_job" "cloud_spanner_vectors_to_cloud_storage" {
spannerDatabaseId = ""
spannerTable = ""
spannerColumnsToExport = ""
- gcsOutputFolder = "gs://your-bucket/folder1/"
- gcsOutputFilePrefix = "vector-embeddings"
+ gcsOutputFolder = ""
+ gcsOutputFilePrefix = ""
# spannerHost = "https://batch-spanner.googleapis.com"
- # spannerVersionTime = "1990-12-31T23:59:60Z"
+ # spannerVersionTime = ""
# spannerDataBoostEnabled = "false"
# spannerPriority = ""
}
diff --git a/v1/README_Datastore_to_Datastore_Delete.md b/v1/README_Datastore_to_Datastore_Delete.md
index a57e414abb..5225c01a6d 100644
--- a/v1/README_Datastore_to_Datastore_Delete.md
+++ b/v1/README_Datastore_to_Datastore_Delete.md
@@ -18,16 +18,16 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **datastoreReadGqlQuery** : A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
-* **datastoreReadProjectId** : The ID of the Google Cloud project that contains the Datastore instance that you want to read data from.
-* **datastoreDeleteProjectId** : Google Cloud Project Id of where to delete the datastore entities.
+* **datastoreReadGqlQuery**: A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
+* **datastoreReadProjectId**: The ID of the Google Cloud project that contains the Datastore instance that you want to read data from.
+* **datastoreDeleteProjectId**: Google Cloud Project Id of where to delete the datastore entities.
### Optional parameters
-* **datastoreReadNamespace** : The namespace of the requested entities. To use the default namespace, leave this parameter blank.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **datastoreHintNumWorkers** : Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to: 500.
+* **datastoreReadNamespace**: The namespace of the requested entities. To use the default namespace, leave this parameter blank.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **datastoreHintNumWorkers**: Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to: 500.
## User-Defined functions (UDFs)
diff --git a/v1/README_Datastore_to_GCS_Text.md b/v1/README_Datastore_to_GCS_Text.md
index 4a513cf37a..cd7eebdf74 100644
--- a/v1/README_Datastore_to_GCS_Text.md
+++ b/v1/README_Datastore_to_GCS_Text.md
@@ -19,15 +19,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **datastoreReadGqlQuery** : A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
-* **datastoreReadProjectId** : The ID of the Google Cloud project that contains the Datastore instance that you want to read data from.
-* **textWritePrefix** : The Cloud Storage path prefix that specifies where the data is written. (Example: gs://mybucket/somefolder/).
+* **datastoreReadGqlQuery**: A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
+* **datastoreReadProjectId**: The ID of the Google Cloud project that contains the Datastore instance that you want to read data from.
+* **textWritePrefix**: The Cloud Storage path prefix that specifies where the data is written. For example, `gs://mybucket/somefolder/`.
### Optional parameters
-* **datastoreReadNamespace** : The namespace of the requested entities. To use the default namespace, leave this parameter blank.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **datastoreReadNamespace**: The namespace of the requested entities. To use the default namespace, leave this parameter blank.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
## User-Defined functions (UDFs)
@@ -219,7 +219,7 @@ resource "google_dataflow_job" "datastore_to_gcs_text" {
parameters = {
datastoreReadGqlQuery = ""
datastoreReadProjectId = ""
- textWritePrefix = "gs://mybucket/somefolder/"
+ textWritePrefix = ""
# datastoreReadNamespace = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
diff --git a/v1/README_Firestore_to_Firestore_Delete.md b/v1/README_Firestore_to_Firestore_Delete.md
index eb15ffd886..5801238a6b 100644
--- a/v1/README_Firestore_to_Firestore_Delete.md
+++ b/v1/README_Firestore_to_Firestore_Delete.md
@@ -18,16 +18,16 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **firestoreReadGqlQuery** : A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
-* **firestoreReadProjectId** : The ID of the Google Cloud project that contains the Firestore instance that you want to read data from.
-* **firestoreDeleteProjectId** : Google Cloud Project Id of where to delete the firestore entities.
+* **firestoreReadGqlQuery**: A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
+* **firestoreReadProjectId**: The ID of the Google Cloud project that contains the Firestore instance that you want to read data from.
+* **firestoreDeleteProjectId**: Google Cloud Project Id of where to delete the firestore entities.
### Optional parameters
-* **firestoreReadNamespace** : The namespace of the requested entities. To use the default namespace, leave this parameter blank.
-* **firestoreHintNumWorkers** : Hint for the expected number of workers in the Firestore ramp-up throttling step. Defaults to: 500.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **firestoreReadNamespace**: The namespace of the requested entities. To use the default namespace, leave this parameter blank.
+* **firestoreHintNumWorkers**: Hint for the expected number of workers in the Firestore ramp-up throttling step. Defaults to: 500.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
## User-Defined functions (UDFs)
diff --git a/v1/README_Firestore_to_GCS_Text.md b/v1/README_Firestore_to_GCS_Text.md
index 569bbec1bf..3110a1a190 100644
--- a/v1/README_Firestore_to_GCS_Text.md
+++ b/v1/README_Firestore_to_GCS_Text.md
@@ -19,15 +19,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **firestoreReadGqlQuery** : A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
-* **firestoreReadProjectId** : The ID of the Google Cloud project that contains the Firestore instance that you want to read data from.
-* **textWritePrefix** : The Cloud Storage path prefix that specifies where the data is written. (Example: gs://mybucket/somefolder/).
+* **firestoreReadGqlQuery**: A GQL (https://cloud.google.com/datastore/docs/reference/gql_reference) query that specifies which entities to grab. For example, `SELECT * FROM MyKind`.
+* **firestoreReadProjectId**: The ID of the Google Cloud project that contains the Firestore instance that you want to read data from.
+* **textWritePrefix**: The Cloud Storage path prefix that specifies where the data is written. For example, `gs://mybucket/somefolder/`.
### Optional parameters
-* **firestoreReadNamespace** : The namespace of the requested entities. To use the default namespace, leave this parameter blank.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **firestoreReadNamespace**: The namespace of the requested entities. To use the default namespace, leave this parameter blank.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
## User-Defined functions (UDFs)
@@ -219,7 +219,7 @@ resource "google_dataflow_job" "firestore_to_gcs_text" {
parameters = {
firestoreReadGqlQuery = ""
firestoreReadProjectId = ""
- textWritePrefix = "gs://mybucket/somefolder/"
+ textWritePrefix = ""
# firestoreReadNamespace = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
diff --git a/v1/README_GCS_Avro_to_Cloud_Bigtable.md b/v1/README_GCS_Avro_to_Cloud_Bigtable.md
index 3999f51f8d..e50f50f4b9 100644
--- a/v1/README_GCS_Avro_to_Cloud_Bigtable.md
+++ b/v1/README_GCS_Avro_to_Cloud_Bigtable.md
@@ -18,14 +18,14 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The ID of the Google Cloud project that contains the Bigtable instance that you want to write data to.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to import.
-* **inputFilePattern** : The Cloud Storage path pattern where data is located. (Example: gs:////*).
+* **bigtableProjectId**: The ID of the Google Cloud project that contains the Bigtable instance that you want to write data to.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to import.
+* **inputFilePattern**: The Cloud Storage path pattern where data is located. For example, `gs:///FOLDER/PREFIX*`.
### Optional parameters
-* **splitLargeRows** : The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
+* **splitLargeRows**: The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic.
@@ -205,7 +205,7 @@ resource "google_dataflow_job" "gcs_avro_to_cloud_bigtable" {
bigtableProjectId = ""
bigtableInstanceId = ""
bigtableTableId = ""
- inputFilePattern = "gs:////*"
+ inputFilePattern = ""
# splitLargeRows = ""
}
}
diff --git a/v1/README_GCS_Avro_to_Cloud_Spanner.md b/v1/README_GCS_Avro_to_Cloud_Spanner.md
index 6b57828c39..ee4cf0ccaf 100644
--- a/v1/README_GCS_Avro_to_Cloud_Spanner.md
+++ b/v1/README_GCS_Avro_to_Cloud_Spanner.md
@@ -18,21 +18,21 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **instanceId** : The instance ID of the Spanner database.
-* **databaseId** : The database ID of the Spanner database.
-* **inputDir** : The Cloud Storage path where the Avro files are imported from.
+* **instanceId**: The instance ID of the Spanner database.
+* **databaseId**: The database ID of the Spanner database.
+* **inputDir**: The Cloud Storage path where the Avro files are imported from.
### Optional parameters
-* **spannerHost** : The Cloud Spanner endpoint to call in the template. Only used for testing. (Example: https://batch-spanner.googleapis.com). Defaults to: https://batch-spanner.googleapis.com.
-* **waitForIndexes** : If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. The default value is `false`.
-* **waitForForeignKeys** : If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. The default value is `false`.
-* **waitForChangeStreams** : If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. The default value is `true`.
-* **waitForSequences** : By default, the import pipeline is blocked on sequence creation. If `false`, the import pipeline might complete with sequences still being created in the background.
-* **earlyIndexCreateFlag** : Specifies whether early index creation is enabled. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set `earlyIndexCreateFlag` to `false`. The default value is `true`.
-* **spannerProjectId** : The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.
-* **ddlCreationTimeoutInMinutes** : The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.
-* **spannerPriority** : The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
+* **spannerHost**: The Cloud Spanner endpoint to call in the template. Only used for testing. For example, `https://batch-spanner.googleapis.com`. Defaults to: https://batch-spanner.googleapis.com.
+* **waitForIndexes**: If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. The default value is `false`.
+* **waitForForeignKeys**: If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. The default value is `false`.
+* **waitForChangeStreams**: If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. The default value is `true`.
+* **waitForSequences**: By default, the import pipeline is blocked on sequence creation. If `false`, the import pipeline might complete with sequences still being created in the background.
+* **earlyIndexCreateFlag**: Specifies whether early index creation is enabled. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set `earlyIndexCreateFlag` to `false`. The default value is `true`.
+* **spannerProjectId**: The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.
+* **ddlCreationTimeoutInMinutes**: The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.
+* **spannerPriority**: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
diff --git a/v1/README_GCS_CSV_to_BigQuery.md b/v1/README_GCS_CSV_to_BigQuery.md
index 2d742cff3d..392774e6a6 100644
--- a/v1/README_GCS_CSV_to_BigQuery.md
+++ b/v1/README_GCS_CSV_to_BigQuery.md
@@ -16,18 +16,18 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The Cloud Storage path to the CSV file that contains the text to process. (Example: gs://your-bucket/path/*.csv).
-* **schemaJSONPath** : The Cloud Storage path to the JSON file that defines your BigQuery schema.
-* **outputTable** : The name of the BigQuery table that stores your processed data. If you reuse an existing BigQuery table, the data is appended to the destination table.
-* **bigQueryLoadingTemporaryDirectory** : The temporary directory to use during the BigQuery loading process. (Example: gs://your-bucket/your-files/temp_dir).
-* **badRecordsOutputTable** : The name of the BigQuery table to use to store the rejected data when processing the CSV files. If you reuse an existing BigQuery table, the data is appended to the destination table. The schema of this table must match the error table schema (https://cloud.google.com/dataflow/docs/guides/templates/provided/cloud-storage-csv-to-bigquery#GcsCSVToBigQueryBadRecordsSchema).
-* **delimiter** : The column delimiter that the CSV file uses. (Example: ,).
-* **csvFormat** : The CSV format according to Apache Commons CSV format. Defaults to: Default.
+* **inputFilePattern**: The Cloud Storage path to the CSV file that contains the text to process. For example, `gs://your-bucket/path/*.csv`.
+* **schemaJSONPath**: The Cloud Storage path to the JSON file that defines your BigQuery schema.
+* **outputTable**: The name of the BigQuery table that stores your processed data. If you reuse an existing BigQuery table, the data is appended to the destination table.
+* **bigQueryLoadingTemporaryDirectory**: The temporary directory to use during the BigQuery loading process. For example, `gs://your-bucket/your-files/temp_dir`.
+* **badRecordsOutputTable**: The name of the BigQuery table to use to store the rejected data when processing the CSV files. If you reuse an existing BigQuery table, the data is appended to the destination table. The schema of this table must match the error table schema (https://cloud.google.com/dataflow/docs/guides/templates/provided/cloud-storage-csv-to-bigquery#GcsCSVToBigQueryBadRecordsSchema).
+* **delimiter**: The column delimiter that the CSV file uses. For example, `,`.
+* **csvFormat**: The CSV format according to Apache Commons CSV format. Defaults to: `Default`.
### Optional parameters
-* **containsHeaders** : Whether headers are included in the CSV file. Defaults to: false.
-* **csvFileEncoding** : The CSV file character encoding format. Allowed Values are US-ASCII, ISO-8859-1, UTF-8, and UTF-16. Defaults to: UTF-8.
+* **containsHeaders**: Whether headers are included in the CSV file. Defaults to: `false`.
+* **csvFileEncoding**: The CSV file character encoding format. Allowed Values are `US-ASCII`, `ISO-8859-1`, `UTF-8`, and `UTF-16`. Defaults to: UTF-8.
@@ -216,12 +216,12 @@ resource "google_dataflow_job" "gcs_csv_to_bigquery" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://your-bucket/path/*.csv"
+ inputFilePattern = ""
schemaJSONPath = ""
outputTable = ""
- bigQueryLoadingTemporaryDirectory = "gs://your-bucket/your-files/temp_dir"
+ bigQueryLoadingTemporaryDirectory = ""
badRecordsOutputTable = ""
- delimiter = ","
+ delimiter = ""
csvFormat = ""
# containsHeaders = "false"
# csvFileEncoding = "UTF-8"
diff --git a/v1/README_GCS_Parquet_to_Cloud_Bigtable.md b/v1/README_GCS_Parquet_to_Cloud_Bigtable.md
index 8ea1677b77..012aefa96d 100644
--- a/v1/README_GCS_Parquet_to_Cloud_Bigtable.md
+++ b/v1/README_GCS_Parquet_to_Cloud_Bigtable.md
@@ -18,14 +18,14 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProjectId** : The Google Cloud project ID associated with the Bigtable instance.
-* **bigtableInstanceId** : The ID of the Cloud Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to import.
-* **inputFilePattern** : The Cloud Storage path with the files that contain the data. (Example: gs://your-bucket/your-files/*.parquet).
+* **bigtableProjectId**: The Google Cloud project ID associated with the Bigtable instance.
+* **bigtableInstanceId**: The ID of the Cloud Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to import.
+* **inputFilePattern**: The Cloud Storage path with the files that contain the data. For example, `gs://your-bucket/your-files/*.parquet`.
### Optional parameters
-* **splitLargeRows** : The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
+* **splitLargeRows**: The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic.
@@ -205,7 +205,7 @@ resource "google_dataflow_job" "gcs_parquet_to_cloud_bigtable" {
bigtableProjectId = ""
bigtableInstanceId = ""
bigtableTableId = ""
- inputFilePattern = "gs://your-bucket/your-files/*.parquet"
+ inputFilePattern = ""
# splitLargeRows = ""
}
}
diff --git a/v1/README_GCS_SequenceFile_to_Cloud_Bigtable.md b/v1/README_GCS_SequenceFile_to_Cloud_Bigtable.md
index 85e1464716..883d48356d 100644
--- a/v1/README_GCS_SequenceFile_to_Cloud_Bigtable.md
+++ b/v1/README_GCS_SequenceFile_to_Cloud_Bigtable.md
@@ -18,15 +18,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **bigtableProject** : The ID of the Google Cloud project that contains the Bigtable instance that you want to write data to.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to import.
-* **sourcePattern** : The Cloud Storage path pattern to the location of the data. (Example: gs://your-bucket/your-path/prefix*).
+* **bigtableProject**: The ID of the Google Cloud project that contains the Bigtable instance that you want to write data to.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to import.
+* **sourcePattern**: The Cloud Storage path pattern to the location of the data. For example, `gs://your-bucket/your-path/prefix*`.
### Optional parameters
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the import. If you don't specify an application profile, Bigtable uses the instance's default application profile (https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile).
-* **mutationThrottleLatencyMs** : Optional Set mutation latency throttling (enables the feature). Value in milliseconds. Defaults to: 0.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the import. If you don't specify an application profile, Bigtable uses the instance's default application profile (https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile).
+* **mutationThrottleLatencyMs**: Optional Set mutation latency throttling (enables the feature). Value in milliseconds. Defaults to: 0.
@@ -209,7 +209,7 @@ resource "google_dataflow_job" "gcs_sequencefile_to_cloud_bigtable" {
bigtableProject = ""
bigtableInstanceId = ""
bigtableTableId = ""
- sourcePattern = "gs://your-bucket/your-path/prefix*"
+ sourcePattern = ""
# bigtableAppProfileId = ""
# mutationThrottleLatencyMs = "0"
}
diff --git a/v1/README_GCS_Text_to_BigQuery.md b/v1/README_GCS_Text_to_BigQuery.md
index 11feb39ca2..49815457aa 100644
--- a/v1/README_GCS_Text_to_BigQuery.md
+++ b/v1/README_GCS_Text_to_BigQuery.md
@@ -19,8 +19,8 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : Path of the file pattern glob to read from. (Example: gs://your-bucket/path/*.csv).
-* **JSONPath** : JSON file with BigQuery Schema description. JSON Example: {
+* **inputFilePattern**: Path of the file pattern glob to read from. For example, `gs://your-bucket/path/*.csv`.
+* **JSONPath**: JSON file with BigQuery Schema description. JSON Example: {
"BigQuery Schema": [
{
"name": "location",
@@ -44,13 +44,13 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
}
]
}.
-* **outputTable** : BigQuery table location to write the output to. The table's schema must match the input objects.
-* **bigQueryLoadingTemporaryDirectory** : Temporary directory for BigQuery loading process (Example: gs://your-bucket/your-files/temp_dir).
+* **outputTable**: BigQuery table location to write the output to. The table's schema must match the input objects.
+* **bigQueryLoadingTemporaryDirectory**: Temporary directory for BigQuery loading process For example, `gs://your-bucket/your-files/temp_dir`.
### Optional parameters
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
## User-Defined functions (UDFs)
@@ -240,10 +240,10 @@ resource "google_dataflow_job" "gcs_text_to_bigquery" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://your-bucket/path/*.csv"
+ inputFilePattern = ""
JSONPath = ""
outputTable = ""
- bigQueryLoadingTemporaryDirectory = "gs://your-bucket/your-files/temp_dir"
+ bigQueryLoadingTemporaryDirectory = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
}
diff --git a/v1/README_GCS_Text_to_Cloud_PubSub.md b/v1/README_GCS_Text_to_Cloud_PubSub.md
index 76626c5237..a284548e9d 100644
--- a/v1/README_GCS_Text_to_Cloud_PubSub.md
+++ b/v1/README_GCS_Text_to_Cloud_PubSub.md
@@ -24,8 +24,8 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The input file pattern to read from. (Example: gs://bucket-name/files/*.json).
-* **outputTopic** : The Pub/Sub input topic to write to. The name must be in the format `projects//topics/`. (Example: projects/your-project-id/topics/your-topic-name).
+* **inputFilePattern**: The input file pattern to read from. For example, `gs://bucket-name/files/*.json`.
+* **outputTopic**: The Pub/Sub input topic to write to. The name must be in the format `projects//topics/`. For example, `projects/your-project-id/topics/your-topic-name`.
### Optional parameters
@@ -196,8 +196,8 @@ resource "google_dataflow_job" "gcs_text_to_cloud_pubsub" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://bucket-name/files/*.json"
- outputTopic = "projects/your-project-id/topics/your-topic-name"
+ inputFilePattern = ""
+ outputTopic = ""
}
}
```
diff --git a/v1/README_GCS_Text_to_Cloud_Spanner.md b/v1/README_GCS_Text_to_Cloud_Spanner.md
index de286dae99..01ce3a7800 100644
--- a/v1/README_GCS_Text_to_Cloud_Spanner.md
+++ b/v1/README_GCS_Text_to_Cloud_Spanner.md
@@ -17,24 +17,24 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **instanceId** : The instance ID of the Spanner database.
-* **databaseId** : The database ID of the Spanner database.
-* **importManifest** : The path in Cloud Storage to use when importing manifest files. (Example: gs://your-bucket/your-folder/your-manifest.json).
+* **instanceId**: The instance ID of the Spanner database.
+* **databaseId**: The database ID of the Spanner database.
+* **importManifest**: The path in Cloud Storage to use when importing manifest files. For example, `gs://your-bucket/your-folder/your-manifest.json`.
### Optional parameters
-* **spannerHost** : The Cloud Spanner endpoint to call in the template. Only used for testing. (Example: https://batch-spanner.googleapis.com). Defaults to: https://batch-spanner.googleapis.com.
-* **columnDelimiter** : The column delimiter that the source file uses. The default value is ','. (Example: ,).
-* **fieldQualifier** : The character that must surround any value in the source file that contains the columnDelimiter. The default value is ".
-* **trailingDelimiter** : Specifies whether the lines in the source files have trailing delimiters, that is, whether the `columnDelimiter` character appears at the end of each line, after the last column value). The default value is `true`.
-* **escape** : The escape character the source file uses. By default, this parameter is not set and the template does not use the escape character.
-* **nullString** : The string that represents a `NULL` value. By default, this parameter is not set and the template does not use the null string.
-* **dateFormat** : The format used to parse date columns. By default, the pipeline tries to parse the date columns as `yyyy-M-d[' 00:00:00']`, for example, as 2019-01-31 or 2019-1-1 00:00:00. If your date format is different, specify the format using the java.time.format.DateTimeFormatter (https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/format/DateTimeFormatter.html) patterns.
-* **timestampFormat** : The format used to parse timestamp columns. If the timestamp is a long integer, then it is parsed as Unix epoch time. Otherwise, it is parsed as a string using the java.time.format.DateTimeFormatter.ISO_INSTANT (https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_INSTANT) format. For other cases, specify your own pattern string, for example, using `MMM dd yyyy HH:mm:ss.SSSVV` for timestamps in the form of `"Jan 21 1998 01:02:03.456+08:00"`.
-* **spannerProjectId** : The ID of the Google Cloud project that contains the Spanner database. If not set, the project ID of the default Google Cloud project is used.
-* **spannerPriority** : The request priority for Spanner calls. Possible values are HIGH, MEDIUM, and LOW. The default value is MEDIUM.
-* **handleNewLine** : If `true`, the input data can contain newline characters. Otherwise, newline characters cause an error. The default value is `false`. Enabling newline handling can reduce performance.
-* **invalidOutputPath** : The Cloud Storage path to use when writing rows that cannot be imported. (Example: gs://your-bucket/your-path). Defaults to empty.
+* **spannerHost**: The Cloud Spanner endpoint to call in the template. Only used for testing. For example, `https://batch-spanner.googleapis.com`. Defaults to: https://batch-spanner.googleapis.com.
+* **columnDelimiter**: The column delimiter that the source file uses. The default value is `,`. For example, `,`.
+* **fieldQualifier**: The character that must surround any value in the source file that contains the columnDelimiter. The default value is double quotes.
+* **trailingDelimiter**: Specifies whether the lines in the source files have trailing delimiters, that is, whether the `columnDelimiter` character appears at the end of each line, after the last column value. The default value is `true`.
+* **escape**: The escape character the source file uses. By default, this parameter is not set and the template does not use the escape character.
+* **nullString**: The string that represents a `NULL` value. By default, this parameter is not set and the template does not use the null string.
+* **dateFormat**: The format used to parse date columns. By default, the pipeline tries to parse the date columns as `yyyy-M-d[' 00:00:00']`, for example, as `2019-01-31` or `2019-1-1 00:00:00`. If your date format is different, specify the format using the java.time.format.DateTimeFormatter (https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/format/DateTimeFormatter.html) patterns.
+* **timestampFormat**: The format used to parse timestamp columns. If the timestamp is a long integer, then it is parsed as Unix epoch time. Otherwise, it is parsed as a string using the java.time.format.DateTimeFormatter.ISO_INSTANT (https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_INSTANT) format. For other cases, specify your own pattern string, for example, using `MMM dd yyyy HH:mm:ss.SSSVV` for timestamps in the form of `Jan 21 1998 01:02:03.456+08:00`.
+* **spannerProjectId**: The ID of the Google Cloud project that contains the Spanner database. If not set, the project ID of the default Google Cloud project is used.
+* **spannerPriority**: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.
+* **handleNewLine**: If `true`, the input data can contain newline characters. Otherwise, newline characters cause an error. The default value is `false`. Enabling newline handling can reduce performance.
+* **invalidOutputPath**: The Cloud Storage path to use when writing rows that cannot be imported. For example, `gs://your-bucket/your-path`. Defaults to empty.
@@ -243,7 +243,7 @@ resource "google_dataflow_job" "gcs_text_to_cloud_spanner" {
parameters = {
instanceId = ""
databaseId = ""
- importManifest = "gs://your-bucket/your-folder/your-manifest.json"
+ importManifest = ""
# spannerHost = "https://batch-spanner.googleapis.com"
# columnDelimiter = ","
# fieldQualifier = """
@@ -255,7 +255,7 @@ resource "google_dataflow_job" "gcs_text_to_cloud_spanner" {
# spannerProjectId = ""
# spannerPriority = ""
# handleNewLine = "false"
- # invalidOutputPath = "gs://your-bucket/your-path"
+ # invalidOutputPath = ""
}
}
```
diff --git a/v1/README_GCS_Text_to_Datastore.md b/v1/README_GCS_Text_to_Datastore.md
index a7ac8e2d9f..179d9c9570 100644
--- a/v1/README_GCS_Text_to_Datastore.md
+++ b/v1/README_GCS_Text_to_Datastore.md
@@ -20,15 +20,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **textReadPattern** : A Cloud Storage path pattern that specifies the location of your text data files. For example, `gs://mybucket/somepath/*.json`.
-* **datastoreWriteProjectId** : The ID of the Google Cloud project to write the Datastore entities to.
-* **errorWritePath** : The error log output file to use for write failures that occur during processing. (Example: gs://your-bucket/errors/).
+* **textReadPattern**: A Cloud Storage path pattern that specifies the location of your text data files. For example, `gs://mybucket/somepath/*.json`.
+* **datastoreWriteProjectId**: The ID of the Google Cloud project to write the Datastore entities to.
+* **errorWritePath**: The error log output file to use for write failures that occur during processing. For example, `gs://your-bucket/errors/`.
### Optional parameters
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **datastoreHintNumWorkers** : Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **datastoreHintNumWorkers**: Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to `500`.
## User-Defined functions (UDFs)
@@ -220,7 +220,7 @@ resource "google_dataflow_job" "gcs_text_to_datastore" {
parameters = {
textReadPattern = ""
datastoreWriteProjectId = ""
- errorWritePath = "gs://your-bucket/errors/"
+ errorWritePath = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
# datastoreHintNumWorkers = "500"
diff --git a/v1/README_GCS_Text_to_Firestore.md b/v1/README_GCS_Text_to_Firestore.md
index f0e1f4a4bd..522ffd0498 100644
--- a/v1/README_GCS_Text_to_Firestore.md
+++ b/v1/README_GCS_Text_to_Firestore.md
@@ -20,15 +20,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **textReadPattern** : A Cloud Storage path pattern that specifies the location of your text data files. For example, `gs://mybucket/somepath/*.json`.
-* **firestoreWriteProjectId** : The ID of the Google Cloud project to write the Firestore entities to.
-* **errorWritePath** : The error log output file to use for write failures that occur during processing. (Example: gs://your-bucket/errors/).
+* **textReadPattern**: A Cloud Storage path pattern that specifies the location of your text data files. For example, `gs://mybucket/somepath/*.json`.
+* **firestoreWriteProjectId**: The ID of the Google Cloud project to write the Firestore entities to.
+* **errorWritePath**: The error log output file to use for write failures that occur during processing. For example, `gs://your-bucket/errors/`.
### Optional parameters
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **firestoreHintNumWorkers** : Hint for the expected number of workers in the Firestore ramp-up throttling step. Default is 500.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **firestoreHintNumWorkers**: Hint for the expected number of workers in the Firestore ramp-up throttling step. The default value is `500`.
## User-Defined functions (UDFs)
@@ -220,7 +220,7 @@ resource "google_dataflow_job" "gcs_text_to_firestore" {
parameters = {
textReadPattern = ""
firestoreWriteProjectId = ""
- errorWritePath = "gs://your-bucket/errors/"
+ errorWritePath = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
# firestoreHintNumWorkers = ""
diff --git a/v1/README_Jdbc_to_BigQuery.md b/v1/README_Jdbc_to_BigQuery.md
index 435929e589..6ebdb7b27c 100644
--- a/v1/README_Jdbc_to_BigQuery.md
+++ b/v1/README_Jdbc_to_BigQuery.md
@@ -25,22 +25,22 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **driverJars** : Comma separate Cloud Storage paths for JDBC drivers. (Example: gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar).
-* **driverClassName** : JDBC driver class name to use. (Example: com.mysql.jdbc.Driver).
-* **connectionURL** : Url connection string to connect to the JDBC source. (Example: jdbc:mysql://some-host:3306/sampledb).
-* **query** : Query to be executed on the source to extract the data. If a Cloud Storage path is given (gs://...), the query will be fetched from that file. (Example: select * from sampledb.sample_table).
-* **outputTable** : BigQuery table location to write the output to. The table's schema must match the input objects.
-* **bigQueryLoadingTemporaryDirectory** : Temporary directory for BigQuery loading process (Example: gs://your-bucket/your-files/temp_dir).
+* **driverJars**: Comma separate Cloud Storage paths for JDBC drivers. For example, `gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar`.
+* **driverClassName**: JDBC driver class name to use. For example, `com.mysql.jdbc.Driver`.
+* **connectionURL**: Url connection string to connect to the JDBC source. For example, `jdbc:mysql://some-host:3306/sampledb`.
+* **query**: Query to be executed on the source to extract the data. If a Cloud Storage path is given (gs://...), the query will be fetched from that file. For example, `select * from sampledb.sample_table`.
+* **outputTable**: BigQuery table location to write the output to. The table's schema must match the input objects.
+* **bigQueryLoadingTemporaryDirectory**: Temporary directory for BigQuery loading process For example, `gs://your-bucket/your-files/temp_dir`.
### Optional parameters
-* **connectionProperties** : Properties string to use for the JDBC connection. Format of the string must be [propertyName=property;]*. (Example: unicode=true;characterEncoding=UTF-8).
-* **username** : User name to be used for the JDBC connection. User name can be passed in as plaintext or as a base64 encoded string encrypted by Google Cloud KMS.
-* **password** : Password to be used for the JDBC connection. Password can be passed in as plaintext or as a base64 encoded string encrypted by Google Cloud KMS.
-* **KMSEncryptionKey** : If this parameter is provided, password, user name and connection string should all be passed in encrypted. Encrypt parameters using the KMS API encrypt endpoint. See: https://cloud.google.com/kms/docs/reference/rest/v1/projects.locations.keyRings.cryptoKeys/encrypt (Example: projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key).
-* **useColumnAlias** : If enabled (set to true) the pipeline will consider column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.
-* **disabledAlgorithms** : Comma-separated list of algorithms to disable. If this value is set to none, no algorithm is disabled. Use this parameter with caution, because the algorithms disabled by default might have vulnerabilities or performance issues. (Example: SSLv3, RC4).
-* **extraFilesToStage** : Comma-separated Cloud Storage paths or Secret Manager secrets for files to stage in the worker. These files are saved in the /extra_files directory in each worker. (Example: gs:///file.txt,projects//secrets//versions/).
+* **connectionProperties**: Properties string to use for the JDBC connection. Format of the string must be [propertyName=property;]*. For example, `unicode=true;characterEncoding=UTF-8`.
+* **username**: User name to be used for the JDBC connection. User name can be passed in as plaintext or as a base64 encoded string encrypted by Google Cloud KMS.
+* **password**: Password to be used for the JDBC connection. Password can be passed in as plaintext or as a base64 encoded string encrypted by Google Cloud KMS.
+* **KMSEncryptionKey**: If this parameter is provided, password, user name and connection string should all be passed in encrypted. Encrypt parameters using the KMS API encrypt endpoint. See: https://cloud.google.com/kms/docs/reference/rest/v1/projects.locations.keyRings.cryptoKeys/encrypt For example, `projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key`.
+* **useColumnAlias**: If enabled (set to true) the pipeline will consider column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.
+* **disabledAlgorithms**: Comma-separated list of algorithms to disable. If this value is set to none, no algorithm is disabled. Use this parameter with caution, because the algorithms disabled by default might have vulnerabilities or performance issues. For example, `SSLv3, RC4`.
+* **extraFilesToStage**: Comma-separated Cloud Storage paths or Secret Manager secrets for files to stage in the worker. These files are saved in the /extra_files directory in each worker. For example, `gs:///file.txt,projects//secrets//versions/`.
@@ -241,19 +241,19 @@ resource "google_dataflow_job" "jdbc_to_bigquery" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- driverJars = "gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar"
- driverClassName = "com.mysql.jdbc.Driver"
- connectionURL = "jdbc:mysql://some-host:3306/sampledb"
- query = "select * from sampledb.sample_table"
+ driverJars = ""
+ driverClassName = ""
+ connectionURL = ""
+ query = ""
outputTable = ""
- bigQueryLoadingTemporaryDirectory = "gs://your-bucket/your-files/temp_dir"
- # connectionProperties = "unicode=true;characterEncoding=UTF-8"
+ bigQueryLoadingTemporaryDirectory = ""
+ # connectionProperties = ""
# username = ""
# password = ""
- # KMSEncryptionKey = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key"
+ # KMSEncryptionKey = ""
# useColumnAlias = "false"
- # disabledAlgorithms = "SSLv3, RC4"
- # extraFilesToStage = "gs:///file.txt,projects//secrets//versions/"
+ # disabledAlgorithms = ""
+ # extraFilesToStage = ""
}
}
```
diff --git a/v1/README_PubSub_Subscription_to_BigQuery.md b/v1/README_PubSub_Subscription_to_BigQuery.md
index 4b3607e31b..abbe209540 100644
--- a/v1/README_PubSub_Subscription_to_BigQuery.md
+++ b/v1/README_PubSub_Subscription_to_BigQuery.md
@@ -20,15 +20,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **outputTableSpec** : The BigQuery output table location, in the format `:.`.
-* **inputSubscription** : The Pub/Sub input subscription to read from, in the format `projects//subscriptions/`.
+* **outputTableSpec**: The BigQuery output table location, in the format `:.`.
+* **inputSubscription**: The Pub/Sub input subscription to read from, in the format `projects//subscriptions/`.
### Optional parameters
-* **outputDeadletterTable** : The BigQuery table to use for messages that fail to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **javascriptTextTransformReloadIntervalMinutes** : Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
+* **outputDeadletterTable**: The BigQuery table to use for messages that fail to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformReloadIntervalMinutes**: Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
## User-Defined functions (UDFs)
diff --git a/v1/README_PubSub_to_BigQuery.md b/v1/README_PubSub_to_BigQuery.md
index fdab41b3af..bf88206103 100644
--- a/v1/README_PubSub_to_BigQuery.md
+++ b/v1/README_PubSub_to_BigQuery.md
@@ -20,15 +20,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **outputTableSpec** : The BigQuery output table location, in the format `:.`.
-* **inputTopic** : The Pub/Sub topic to read the input from.
+* **outputTableSpec**: The BigQuery output table location, in the format `:.`.
+* **inputTopic**: The Pub/Sub topic to read the input from.
### Optional parameters
-* **outputDeadletterTable** : The BigQuery table to use for messages that fail to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **javascriptTextTransformReloadIntervalMinutes** : Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
+* **outputDeadletterTable**: The BigQuery table to use for messages that fail to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformReloadIntervalMinutes**: Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
## User-Defined functions (UDFs)
diff --git a/v1/README_Spanner_to_GCS_Text.md b/v1/README_Spanner_to_GCS_Text.md
index 65e637306d..93ab53cfe8 100644
--- a/v1/README_Spanner_to_GCS_Text.md
+++ b/v1/README_Spanner_to_GCS_Text.md
@@ -18,19 +18,19 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **spannerTable** : The Spanner table to read the data from.
-* **spannerProjectId** : The ID of the Google Cloud project that contains the Spanner database to read data from.
-* **spannerInstanceId** : The instance ID of the requested table.
-* **spannerDatabaseId** : The database ID of the requested table.
-* **textWritePrefix** : The Cloud Storage path prefix that specifies where the data is written. (Example: gs://mybucket/somefolder/).
+* **spannerTable**: The Spanner table to read the data from.
+* **spannerProjectId**: The ID of the Google Cloud project that contains the Spanner database to read data from.
+* **spannerInstanceId**: The instance ID of the requested table.
+* **spannerDatabaseId**: The database ID of the requested table.
+* **textWritePrefix**: The Cloud Storage path prefix that specifies where the data is written. For example, `gs://mybucket/somefolder/`.
### Optional parameters
-* **csvTempDirectory** : The Cloud Storage path where temporary CSV files are written. (Example: gs://your-bucket/your-path).
-* **spannerPriority** : The request priority (https://cloud.google.com/spanner/docs/reference/rest/v1/RequestOptions) for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.
-* **spannerHost** : The Cloud Spanner endpoint to call in the template. Only used for testing. (Example: https://batch-spanner.googleapis.com). Defaults to: https://batch-spanner.googleapis.com.
-* **spannerSnapshotTime** : The timestamp that corresponds to the version of the Spanner database that you want to read from. The timestamp must be specified in the RFC 3339 (https://tools.ietf.org/html/rfc3339) UTC "Zulu" format. The timestamp must be in the past and maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies. (Example: 1990-12-31T23:59:60Z). Defaults to empty.
-* **dataBoostEnabled** : Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When true, requires the `spanner.databases.useDataBoost` Identity and Access Management (IAM) permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: false.
+* **csvTempDirectory**: The Cloud Storage path where temporary CSV files are written. For example, `gs://your-bucket/your-path`.
+* **spannerPriority**: The request priority (https://cloud.google.com/spanner/docs/reference/rest/v1/RequestOptions) for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.
+* **spannerHost**: The Cloud Spanner endpoint to call in the template. Only used for testing. For example, `https://batch-spanner.googleapis.com`. Defaults to: https://batch-spanner.googleapis.com.
+* **spannerSnapshotTime**: The timestamp that corresponds to the version of the Spanner database that you want to read from. The timestamp must be specified in the RFC 3339 (https://tools.ietf.org/html/rfc3339) UTC Zulu Time format. The timestamp must be in the past and maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies. For example, `1990-12-31T23:59:60Z`. Defaults to empty.
+* **dataBoostEnabled**: Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When true, requires the `spanner.databases.useDataBoost` Identity and Access Management (IAM) permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview). Defaults to: false.
@@ -226,11 +226,11 @@ resource "google_dataflow_job" "spanner_to_gcs_text" {
spannerProjectId = ""
spannerInstanceId = ""
spannerDatabaseId = ""
- textWritePrefix = "gs://mybucket/somefolder/"
- # csvTempDirectory = "gs://your-bucket/your-path"
+ textWritePrefix = ""
+ # csvTempDirectory = ""
# spannerPriority = ""
# spannerHost = "https://batch-spanner.googleapis.com"
- # spannerSnapshotTime = "1990-12-31T23:59:60Z"
+ # spannerSnapshotTime = ""
# dataBoostEnabled = "false"
}
}
diff --git a/v1/README_Stream_DLP_GCS_Text_to_BigQuery.md b/v1/README_Stream_DLP_GCS_Text_to_BigQuery.md
index 7f8079200a..41f81a6f12 100644
--- a/v1/README_Stream_DLP_GCS_Text_to_BigQuery.md
+++ b/v1/README_Stream_DLP_GCS_Text_to_BigQuery.md
@@ -34,15 +34,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The CSV files to read input data records from. Wildcards are also accepted. (Example: gs://mybucket/my_csv_filename.csv or gs://mybucket/file-*.csv).
-* **deidentifyTemplateName** : The Sensitive Data Protection de-identification template to use for API requests, specified with the pattern projects//deidentifyTemplates/. (Example: projects/your-project-id/locations/global/deidentifyTemplates/generated_template_id).
-* **datasetName** : The BigQuery dataset to use when sending tokenized results. The dataset must exist prior to execution.
-* **dlpProjectId** : The ID for the Google Cloud project that owns the DLP API resource. This project can be the same project that owns the Sensitive Data Protection templates, or it can be a separate project.
+* **inputFilePattern**: The CSV files to read input data records from. Wildcards are also accepted. For example, `gs://mybucket/my_csv_filename.csv or gs://mybucket/file-*.csv`.
+* **deidentifyTemplateName**: The Sensitive Data Protection de-identification template to use for API requests, specified with the pattern `projects//deidentifyTemplates/`. For example, `projects/your-project-id/locations/global/deidentifyTemplates/generated_template_id`.
+* **datasetName**: The BigQuery dataset to use when sending tokenized results. The dataset must exist prior to execution.
+* **dlpProjectId**: The ID for the Google Cloud project that owns the DLP API resource. This project can be the same project that owns the Sensitive Data Protection templates, or it can be a separate project.
### Optional parameters
-* **inspectTemplateName** : The Sensitive Data Protection inspection template to use for API requests, specified with the pattern projects//identifyTemplates/. (Example: projects/your-project-id/locations/global/inspectTemplates/generated_template_id).
-* **batchSize** : The chunking or batch size to use for sending data to inspect and detokenize. For a CSV file, the value of `batchSize` is the number of rows in a batch. Determine the batch size based on the size of the records and the sizing of the file. The DLP API has a payload size limit of 524 KB per API call.
+* **inspectTemplateName**: The Sensitive Data Protection inspection template to use for API requests, specified with the pattern `projects//identifyTemplates/`. For example, `projects/your-project-id/locations/global/inspectTemplates/generated_template_id`.
+* **batchSize**: The chunking or batch size to use for sending data to inspect and detokenize. For a CSV file, the value of `batchSize` is the number of rows in a batch. Determine the batch size based on the size of the records and the sizing of the file. The DLP API has a payload size limit of 524 KB per API call.
@@ -222,11 +222,11 @@ resource "google_dataflow_job" "stream_dlp_gcs_text_to_bigquery" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://mybucket/my_csv_filename.csv or gs://mybucket/file-*.csv"
- deidentifyTemplateName = "projects/your-project-id/locations/global/deidentifyTemplates/generated_template_id"
+ inputFilePattern = ""
+ deidentifyTemplateName = ""
datasetName = ""
dlpProjectId = ""
- # inspectTemplateName = "projects/your-project-id/locations/global/inspectTemplates/generated_template_id"
+ # inspectTemplateName = ""
# batchSize = ""
}
}
diff --git a/v1/README_Stream_GCS_Text_to_BigQuery.md b/v1/README_Stream_GCS_Text_to_BigQuery.md
index edaa98aea2..5abd7dd324 100644
--- a/v1/README_Stream_GCS_Text_to_BigQuery.md
+++ b/v1/README_Stream_GCS_Text_to_BigQuery.md
@@ -30,8 +30,8 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : Path of the file pattern glob to read from. (Example: gs://your-bucket/path/*.csv).
-* **JSONPath** : JSON file with BigQuery Schema description. JSON Example: {
+* **inputFilePattern**: Path of the file pattern glob to read from. For example, `gs://your-bucket/path/*.csv`.
+* **JSONPath**: JSON file with BigQuery Schema description. JSON Example: {
"BigQuery Schema": [
{
"name": "location",
@@ -55,15 +55,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
}
]
}.
-* **outputTable** : BigQuery table location to write the output to. The table's schema must match the input objects.
-* **bigQueryLoadingTemporaryDirectory** : Temporary directory for BigQuery loading process (Example: gs://your-bucket/your-files/temp_dir).
+* **outputTable**: BigQuery table location to write the output to. The table's schema must match the input objects.
+* **bigQueryLoadingTemporaryDirectory**: Temporary directory for BigQuery loading process For example, `gs://your-bucket/your-files/temp_dir`.
### Optional parameters
-* **outputDeadletterTable** : BigQuery table for failed messages. Messages failed to reach the output table for different reasons (e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will be created during pipeline execution. If not specified, "outputTableSpec_error_records" is used instead. (Example: your-project-id:your-dataset.your-table-name).
-* **javascriptTextTransformGcsPath** : The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
-* **javascriptTextTransformFunctionName** : The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
-* **javascriptTextTransformReloadIntervalMinutes** : Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
+* **outputDeadletterTable**: BigQuery table for failed messages. Messages failed to reach the output table for different reasons (e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will be created during pipeline execution. If not specified, "outputTableSpec_error_records" is used instead. For example, `your-project-id:your-dataset.your-table-name`.
+* **javascriptTextTransformGcsPath**: The Cloud Storage URI of the .js file that defines the JavaScript user-defined function (UDF) to use. For example, `gs://my-bucket/my-udfs/my_file.js`.
+* **javascriptTextTransformFunctionName**: The name of the JavaScript user-defined function (UDF) to use. For example, if your JavaScript function code is `myTransform(inJson) { /*...do stuff...*/ }`, then the function name is `myTransform`. For sample JavaScript UDFs, see UDF Examples (https://github.com/GoogleCloudPlatform/DataflowTemplates#udf-examples).
+* **javascriptTextTransformReloadIntervalMinutes**: Define the interval that workers may check for JavaScript UDF changes to reload the files. Defaults to: 0.
## User-Defined functions (UDFs)
@@ -259,11 +259,11 @@ resource "google_dataflow_job" "stream_gcs_text_to_bigquery" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://your-bucket/path/*.csv"
+ inputFilePattern = ""
JSONPath = ""
outputTable = ""
- bigQueryLoadingTemporaryDirectory = "gs://your-bucket/your-files/temp_dir"
- # outputDeadletterTable = "your-project-id:your-dataset.your-table-name"
+ bigQueryLoadingTemporaryDirectory = ""
+ # outputDeadletterTable = ""
# javascriptTextTransformGcsPath = ""
# javascriptTextTransformFunctionName = ""
# javascriptTextTransformReloadIntervalMinutes = "0"
diff --git a/v1/README_Stream_GCS_Text_to_Cloud_PubSub.md b/v1/README_Stream_GCS_Text_to_Cloud_PubSub.md
index bb93150e7b..0786cd1296 100644
--- a/v1/README_Stream_GCS_Text_to_Cloud_PubSub.md
+++ b/v1/README_Stream_GCS_Text_to_Cloud_PubSub.md
@@ -32,8 +32,8 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFilePattern** : The input file pattern to read from. (Example: gs://bucket-name/files/*.json).
-* **outputTopic** : The Pub/Sub input topic to write to. The name must be in the format `projects//topics/`. (Example: projects/your-project-id/topics/your-topic-name).
+* **inputFilePattern**: The input file pattern to read from. For example, `gs://bucket-name/files/*.json`.
+* **outputTopic**: The Pub/Sub input topic to write to. The name must be in the format `projects//topics/`. For example, `projects/your-project-id/topics/your-topic-name`.
### Optional parameters
@@ -204,8 +204,8 @@ resource "google_dataflow_job" "stream_gcs_text_to_cloud_pubsub" {
region = var.region
temp_gcs_location = "gs://bucket-name-here/temp"
parameters = {
- inputFilePattern = "gs://bucket-name/files/*.json"
- outputTopic = "projects/your-project-id/topics/your-topic-name"
+ inputFilePattern = ""
+ outputTopic = ""
}
}
```
diff --git a/v1/README_Word_Count.md b/v1/README_Word_Count.md
index 0763b723f1..2e7ec5e8c1 100644
--- a/v1/README_Word_Count.md
+++ b/v1/README_Word_Count.md
@@ -14,8 +14,8 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
### Required parameters
-* **inputFile** : The input file pattern Dataflow reads from. Use the example file (gs://dataflow-samples/shakespeare/kinglear.txt) or enter the path to your own using the same format: gs://your-bucket/your-file.txt.
-* **output** : Path and filename prefix for writing output files. Ex: gs://your-bucket/counts.
+* **inputFile**: The input file pattern Dataflow reads from. Use the example file (gs://dataflow-samples/shakespeare/kinglear.txt) or enter the path to your own using the same format: gs://your-bucket/your-file.txt.
+* **output**: Path and filename prefix for writing output files. Ex: gs://your-bucket/counts.
### Optional parameters
diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/AvroToBigtable.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/AvroToBigtable.java
index 77aa88891e..0d89795f3b 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/AvroToBigtable.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/AvroToBigtable.java
@@ -114,7 +114,7 @@ public interface Options extends PipelineOptions {
groupName = "Source",
description = "Input Cloud Storage File(s)",
helpText = "The Cloud Storage path pattern where data is located.",
- example = "gs:////*")
+ example = "gs://