From 7f2d623c7e10460c25b3bb2b539262b721cb31ba Mon Sep 17 00:00:00 2001
From: Vardhan Vinay Thigle <39047439+VardhanThigle@users.noreply.github.com>
Date: Thu, 28 Nov 2024 07:57:45 +0000
Subject: [PATCH] Updating schema subcommand documentation (#941)

---
 cmd/schema.go               | 22 +++++++++++++++++++---
 docs/cli/data.md            |  3 ++-
 docs/cli/schema-and-data.md |  1 +
 docs/cli/schema.md          | 24 +++++++++++++++++-------
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/cmd/schema.go b/cmd/schema.go
index 4b2e4c5eb..4686feda1 100644
--- a/cmd/schema.go
+++ b/cmd/schema.go
@@ -45,6 +45,7 @@ type SchemaCmd struct {
 	logLevel      string
 	dryRun        bool
 	validate      bool
+	sessionJSON   string
 }
 
 // Name returns the name of operation.
@@ -79,6 +80,7 @@ func (cmd *SchemaCmd) SetFlags(f *flag.FlagSet) {
 	f.StringVar(&cmd.logLevel, "log-level", "DEBUG", "Configure the logging level for the command (INFO, DEBUG), defaults to DEBUG")
 	f.BoolVar(&cmd.dryRun, "dry-run", false, "Flag for generating DDL and schema conversion report without creating a spanner database")
 	f.BoolVar(&cmd.validate, "validate", false, "Flag for validating if all the required input parameters are present")
+	f.StringVar(&cmd.sessionJSON, "session", "", "Optional. Specifies the file we restore session state from.")
 }
 
 func (cmd *SchemaCmd) Execute(ctx context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
@@ -123,12 +125,26 @@ func (cmd *SchemaCmd) Execute(ctx context.Context, f *flag.FlagSet, _ ...interfa
 	schemaConversionStartTime := time.Now()
 	var conv *internal.Conv
 	convImpl := &conversion.ConvImpl{}
-	conv, err = convImpl.SchemaConv(cmd.project, sourceProfile, targetProfile, &ioHelper, &conversion.SchemaFromSourceImpl{})
-	if err != nil {
+	if cmd.sessionJSON != "" {
+		logger.Log.Info("Loading the conversion context from session file."+
+			" The source profile will not be used for the schema conversion.", zap.String("sessionFile", cmd.sessionJSON))
+		conv = internal.MakeConv()
+		err = conversion.ReadSessionFile(conv, cmd.sessionJSON)
+		if err != nil {
+			return subcommands.ExitFailure
+		}
+	} else {
+		conv, err = convImpl.SchemaConv(cmd.project, sourceProfile, targetProfile, &ioHelper, &conversion.SchemaFromSourceImpl{})
+		if err != nil {
+			return subcommands.ExitFailure
+		}
+	}
+	if conv == nil {
+		logger.Log.Error("Could not initialize conversion context from")
 		return subcommands.ExitFailure
 	}
-
 	conversion.WriteSchemaFile(conv, schemaConversionStartTime, cmd.filePrefix+schemaFile, ioHelper.Out, sourceProfile.Driver)
+	// We always write the session file to accommodate for a re-run that might change anything.
 	conversion.WriteSessionFile(conv, cmd.filePrefix+sessionFile, ioHelper.Out)
 
 	// Populate migration request id and migration type in conv object.
diff --git a/docs/cli/data.md b/docs/cli/data.md
index 213ab101e..f214218aa 100644
--- a/docs/cli/data.md
+++ b/docs/cli/data.md
@@ -55,10 +55,11 @@ reference of the gCloud version of SMT, please refer [here](https://cloud.google
             --source-profile='host=host,port=3306,user=user,password=pwd,dbName=db,streamingCfg=streaming.json' \
             --target-profile='project=spanner-project,instance=spanner-instance' --project=migration-project
 
+    To run a minimal downtime data migration on a multi-sharded source, refer to the [example source-profile configuration](config-json#config-for-sharded-minimal-downtime-migrations)
 ## REQUIRED FLAGS
 
      --session=SESSION
-        Specifies the file that you restore session state from.
+        Specifies the file that you restore session state from. This file can be generated using the [schema](schema.md) sub command.
 
      --source=SOURCE
         Flag for specifying source database (e.g., PostgreSQL, MySQL,
diff --git a/docs/cli/schema-and-data.md b/docs/cli/schema-and-data.md
index 937cebd68..511cf8386 100644
--- a/docs/cli/schema-and-data.md
+++ b/docs/cli/schema-and-data.md
@@ -56,6 +56,7 @@ reference of the gCloud version of SMT, please refer [here](https://cloud.google
             --target-profile='project=spanner-project,instance=spanner-insta\
         nce' --project='migration-project'
 
+    To run a minimal downtime schema and data migration on a multi-sharded source, refer to the [example source-profile configuration](config-json#config-for-sharded-minimal-downtime-migrations)
 ## REQUIRED FLAGS
 
      --source=SOURCE
diff --git a/docs/cli/schema.md b/docs/cli/schema.md
index 9f52491e0..5b776bce2 100644
--- a/docs/cli/schema.md
+++ b/docs/cli/schema.md
@@ -8,9 +8,13 @@ nav_order: 1
 # Schema subcommand
 {: .no_toc }
 
-This subcommand can be used to perform schema conversion and report on the quality of the conversion. The generated schema mapping file (session.json) can be then further edited using the Spanner migration tool web UI to make custom edits to the destination schema. This session file
-is then passed to the data subcommand to perform data migration while honoring the defined
-schema mapping. Spanner migration tool also generates Spanner schema which users can modify manually and use directly as well.
+This subcommand can be used to perform schema conversion and report on the quality of the conversion. 
+Based on the options discussed further, it helps with:
+1. Generate Report on quality of conversion.
+2. Generate the Spanner schema in Schema file, which could be manually modified and applied on spanner if required.
+3. Generate schema mapping file (`session.json`), which helps the data migration pipeline with the context how the source shcema maps to spanner schema. If required, the schema mapping file can be manually edited (either directly or with the help of SMT web UI). The modified session file can be passed back as **sessionFilePath** parameter to schema sub command if required.
+4. If you would like to perform the data migration via spanner migration tool, the session file needs be passed to the [data subcommand](data.md) as the **--session** parameter.
+5. Running with `--dry-run` option just generates the report, schema file and session file. In case you also want the generated schema to be automatically applied to spanner, you should run the cli without the `--dry-run` option.
 
 {: .highlight }
 The command below assumes that the open-source version of SMT is being used. For the CLI
@@ -58,9 +62,8 @@ reference of the gCloud version of SMT, please refer [here](https://cloud.google
 
 ## REQUIRED FLAGS
 
-     --source=SOURCE
-        Flag for specifying source database (e.g., PostgreSQL, MySQL,
-        DynamoDB).
+Either `--source-profile` or `--session` must be specified. In case both are specified,
+`--source-profile` is not used for schema conversion.
 
 ## OPTIONAL FLAGS
 
@@ -92,4 +95,11 @@ Detailed description of optional flags can be found [here](./flags.md).
      --project=PROJECT
         Flag for specifying the name of the Google Cloud Project in which the Spanner migration tool
         can create resources required for migration. If the project is not specified, Spanner migration 
-        tool will try to fetch the configured project in the gCloud CLI.
\ No newline at end of file
+        tool will try to fetch the configured project in the gCloud CLI.
+
+     --session=SESSION
+        Specifies the file that you restore session state from. This file can be generaed using the [schma](schema.md) sub command.
+
+     --source=SOURCE
+        Flag for specifying source database (e.g., PostgreSQL, MySQL,
+        DynamoDB).
\ No newline at end of file