From 6f4556c6df39f7d2a3665cf61f69d00d3cae2afd Mon Sep 17 00:00:00 2001
From: Sagar Lakshmipathy <18vidhyasagar@gmail.com>
Date: Mon, 30 Sep 2024 11:15:24 -0700
Subject: [PATCH 1/3] catalog update docs

---
 website/docs/glue-catalog.md | 38 ++++++++++++++++++++++++++++++
 website/docs/snowflake.md    | 45 ++++++++++++++++++++++++++++++++++--
 2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md
index 6d1388c96..3ad5db396 100644
--- a/website/docs/glue-catalog.md
+++ b/website/docs/glue-catalog.md
@@ -99,6 +99,7 @@ From your terminal, create a glue database.
  aws glue create-database --database-input "{\"Name\":\"xtable_synced_db\"}"
  ```
 
+#### Method 1: Using Glue Crawler
 From your terminal, create a glue crawler. Modify the `<yourAccountId>`, `<yourRoleName>` 
 and `<path/to/your/data>`, with appropriate values.
 
@@ -169,6 +170,43 @@ supports Hudi version 0.14.0 as mentioned [here](/docs/features-and-limitations#
 </TabItem>
 <TabItem value="delta">
 
+#### Method 2: Using XTable APIs to sync with AWS Glue Data Catalog directly
+This applies for iceberg target format only.
+
+**Pre-requisites:**
+* Download iceberg-aws-X.X.X.jar from the [Maven repository](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws)
+* Download bundle-X.X.X.jar from the [Maven repository](https://mvnrepository.com/artifact/software.amazon.awssdk/bundle)
+
+Create a `glue-sync-config.yaml` file:
+
+```yaml md title="yaml"
+sourceFormat: HUDI
+targetFormats:
+   - ICEBERG
+datasets:
+   -
+      tableBasePath: s3://path/to/source/data
+      tableName: table_name
+      partitionSpec: partitionpath:VALUE
+      namespace: xtable_synced_db
+```
+
+Create a `glue-sync-catalog.yaml` file:
+    
+```yaml md title="yaml"
+catalogImpl: org.apache.iceberg.aws.glue.GlueCatalog
+catalogName: xtable
+catalogOptions:
+   io-impl: org.apache.iceberg.aws.s3.S3FileIO
+   warehouse: s3://path/to/source
+```
+
+Sample command to sync the table with Glue Data Catalog:
+
+```shell md title="shell"
+java -cp /path/to/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/iceberg-aws-1.3.1.jar:/path/to/bundle-2.23.9.jar org.apache.xtable.utilities.RunSync  --datasetConfig glue-sync-config.yaml --icebergCatalogConfig glue-sync-catalog.yaml
+```
+
 ### Validating the results
 After the crawler runs successfully, you can inspect the catalogued tables in Glue
 and also query the table in Amazon Athena like below:
diff --git a/website/docs/snowflake.md b/website/docs/snowflake.md
index 882f89963..436c03bbf 100644
--- a/website/docs/snowflake.md
+++ b/website/docs/snowflake.md
@@ -47,7 +47,7 @@ TABLE_FORMAT=ICEBERG
 ENABLED=TRUE;
 ```
 
-### Create an Iceberg table from Iceberg metadata in object storage
+### Method 1: Create an Iceberg table from Iceberg metadata in object storage
 Refer to additional [examples](https://docs.snowflake.com/LIMITEDACCESS/iceberg-2023/create-iceberg-table#examples) 
 in the Snowflake Create Iceberg Table guide for more information.
 
@@ -58,4 +58,45 @@ CATALOG=<catalog_name>
 METADATA_FILE_PATH='path/to/metadata/<VERSION>.metadata.json';
 ```
 
-Once the table creation succeeds you can start using the Iceberg table as any other table in Snowflake.
\ No newline at end of file
+Once the table creation succeeds you can start using the Iceberg table as any other table in Snowflake.
+
+### Method 2: Using XTable APIs to sync with Snowflake Catalog directly
+
+#### Pre-requisites:
+
+* Build Apache XTable™ (Incubating) from [source](https://github.com/apache/incubator-xtable)
+* Download `iceberg-aws-X.X.X.jar` from the [Maven repository](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws)
+* Download `bundle-X.X.X.jar` from the [Maven repository](https://mvnrepository.com/artifact/software.amazon.awssdk/bundle)
+* Download `iceberg-spark-runtime-3.X_2.12/X.X.X.jar` from [here](https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/1.4.2/)
+* Download `snowflake-jdbc-X.X.X.jar` from the [Maven repository](https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc)
+
+Create a `snowflake-sync-config.yaml` file:
+
+```yaml md title="yaml"
+sourceFormat: DELTA
+targetFormats:
+  - ICEBERG
+datasets:
+  -
+    tableBasePath: s3://path/to/table
+    tableName: <table_name>
+    namespace: <db_name>.<schema_name>
+```
+
+Create a `snowflake-sync-catalog.yaml` file:
+
+```yaml md title="yaml"
+catalogImpl: org.apache.iceberg.snowflake.SnowflakeCatalog
+catalogName: <catalog_name>
+catalogOptions:
+  io-impl: org.apache.iceberg.aws.s3.S3FileIO
+  warehouse: s3://path/to/table
+  uri: jdbc:snowflake://<account-identifier>.snowflakecomputing.com
+  jdbc.user: <snowflake-username>
+  jdbc.password: <snowflake-password>
+```
+
+Sample command to sync the table with Glue Data Catalog:
+```shell md title="shell"
+java -cp /path/to/iceberg-spark-runtime-3.2_2.12-1.4.2.jar:/path/to/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/snowflake-jdbc-3.13.28.jar:/path/to/iceberg-aws-1.4.2.jar:/Users/sagarl/Downloads/bundle-2.23.9.jar org.apache.xtable.utilities.RunSync  --datasetConfig snowflake-sync-config.yaml --icebergCatalogConfig snowflake-sync-catalog.yaml
+```

From e9aba209266c4b2e7deb293861311426fc75143f Mon Sep 17 00:00:00 2001
From: Sagar Lakshmipathy <18vidhyasagar@gmail.com>
Date: Mon, 30 Sep 2024 11:30:00 -0700
Subject: [PATCH 2/3] catalog update docs

---
 website/docs/glue-catalog.md | 46 +++++++++++++++++-------------------
 website/docs/snowflake.md    |  7 +-----
 2 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md
index 3ad5db396..261673099 100644
--- a/website/docs/glue-catalog.md
+++ b/website/docs/glue-catalog.md
@@ -150,25 +150,6 @@ From your terminal, run the glue crawler.
 Once the crawler succeeds, you’ll be able to query this Iceberg table from Athena,
 EMR and/or Redshift query engines.
 
-<Tabs
-groupId="table-format"
-defaultValue="hudi"
-values={[
-{ label: 'targetFormat: HUDI', value: 'hudi', },
-{ label: 'targetFormat: DELTA', value: 'delta', },
-{ label: 'targetFormat: ICEBERG', value: 'iceberg', },
-]}
->
-
-<TabItem value="hudi">
-
-:::danger LIMITATION for Hudi target format:
-To validate the Hudi targetFormat table results, you need to ensure that the query engine that you're using
-supports Hudi version 0.14.0 as mentioned [here](/docs/features-and-limitations#hudi)
-:::
-
-</TabItem>
-<TabItem value="delta">
 
 #### Method 2: Using XTable APIs to sync with AWS Glue Data Catalog directly
 This applies for iceberg target format only.
@@ -192,7 +173,7 @@ datasets:
 ```
 
 Create a `glue-sync-catalog.yaml` file:
-    
+
 ```yaml md title="yaml"
 catalogImpl: org.apache.iceberg.aws.glue.GlueCatalog
 catalogName: xtable
@@ -206,11 +187,30 @@ Sample command to sync the table with Glue Data Catalog:
 ```shell md title="shell"
 java -cp /path/to/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/iceberg-aws-1.3.1.jar:/path/to/bundle-2.23.9.jar org.apache.xtable.utilities.RunSync  --datasetConfig glue-sync-config.yaml --icebergCatalogConfig glue-sync-catalog.yaml
 ```
-
 ### Validating the results
 After the crawler runs successfully, you can inspect the catalogued tables in Glue
 and also query the table in Amazon Athena like below:
 
+<Tabs
+groupId="table-format"
+defaultValue="hudi"
+values={[
+{ label: 'targetFormat: HUDI', value: 'hudi', },
+{ label: 'targetFormat: DELTA', value: 'delta', },
+{ label: 'targetFormat: ICEBERG', value: 'iceberg', },
+]}
+>
+
+<TabItem value="hudi">
+
+:::danger LIMITATION for Hudi target format:
+To validate the Hudi targetFormat table results, you need to ensure that the query engine that you're using
+supports Hudi version 0.14.0 as mentioned [here](/docs/features-and-limitations#hudi)
+:::
+
+</TabItem>
+<TabItem value="delta">
+
 ```sql
 SELECT * FROM xtable_synced_db.<table_name>;
 ```
@@ -218,9 +218,7 @@ SELECT * FROM xtable_synced_db.<table_name>;
 </TabItem>
 <TabItem value="iceberg">
 
-### Validating the results
-After the crawler runs successfully, you can inspect the catalogued tables in Glue
-and also query the table in Amazon Athena like below:
+
 
 ```sql
 SELECT * FROM xtable_synced_db.<table_name>;
diff --git a/website/docs/snowflake.md b/website/docs/snowflake.md
index 436c03bbf..d0da25eae 100644
--- a/website/docs/snowflake.md
+++ b/website/docs/snowflake.md
@@ -8,11 +8,6 @@ title: "Snowflake"
 Currently, Snowflake supports [Iceberg tables through External Tables](https://www.snowflake.com/blog/expanding-the-data-cloud-with-apache-iceberg/)
 and also [Native Iceberg Tables](https://www.snowflake.com/blog/iceberg-tables-powering-open-standards-with-snowflake-innovations/).
 
-:::note NOTE:
-Iceberg on Snowflake is currently supported in
-[public preview](https://www.snowflake.com/blog/build-open-data-lakehouse-iceberg-tables/)
-:::
-
 ## Steps:
 These are high level steps to help you integrate Apache XTable™ (Incubating) synced Iceberg tables on Snowflake. For more additional information
 refer to the [Getting started with Iceberg tables](https://docs.snowflake.com/LIMITEDACCESS/iceberg-2023/tables-iceberg-getting-started).
@@ -96,7 +91,7 @@ catalogOptions:
   jdbc.password: <snowflake-password>
 ```
 
-Sample command to sync the table with Glue Data Catalog:
+Sample command to sync the table with Snowflake:
 ```shell md title="shell"
 java -cp /path/to/iceberg-spark-runtime-3.2_2.12-1.4.2.jar:/path/to/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/snowflake-jdbc-3.13.28.jar:/path/to/iceberg-aws-1.4.2.jar:/Users/sagarl/Downloads/bundle-2.23.9.jar org.apache.xtable.utilities.RunSync  --datasetConfig snowflake-sync-config.yaml --icebergCatalogConfig snowflake-sync-catalog.yaml
 ```

From 1add45960e9b8733511c7308e02ef12a30480ff4 Mon Sep 17 00:00:00 2001
From: Sagar Lakshmipathy <18vidhyasagar@gmail.com>
Date: Mon, 30 Sep 2024 14:13:47 -0700
Subject: [PATCH 3/3] updated minor nits

---
 website/docs/glue-catalog.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md
index 261673099..d7a23f38c 100644
--- a/website/docs/glue-catalog.md
+++ b/website/docs/glue-catalog.md
@@ -152,7 +152,7 @@ EMR and/or Redshift query engines.
 
 
 #### Method 2: Using XTable APIs to sync with AWS Glue Data Catalog directly
-This applies for iceberg target format only.
+This applies for Iceberg target format only.
 
 **Pre-requisites:**
 * Download iceberg-aws-X.X.X.jar from the [Maven repository](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws)
@@ -161,7 +161,7 @@ This applies for iceberg target format only.
 Create a `glue-sync-config.yaml` file:
 
 ```yaml md title="yaml"
-sourceFormat: HUDI
+sourceFormat: HUDI|DELTA # choose only one
 targetFormats:
    - ICEBERG
 datasets:
@@ -176,7 +176,7 @@ Create a `glue-sync-catalog.yaml` file:
 
 ```yaml md title="yaml"
 catalogImpl: org.apache.iceberg.aws.glue.GlueCatalog
-catalogName: xtable
+catalogName: <catalog_name>
 catalogOptions:
    io-impl: org.apache.iceberg.aws.s3.S3FileIO
    warehouse: s3://path/to/source
@@ -188,7 +188,7 @@ Sample command to sync the table with Glue Data Catalog:
 java -cp /path/to/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/iceberg-aws-1.3.1.jar:/path/to/bundle-2.23.9.jar org.apache.xtable.utilities.RunSync  --datasetConfig glue-sync-config.yaml --icebergCatalogConfig glue-sync-catalog.yaml
 ```
 ### Validating the results
-After the crawler runs successfully, you can inspect the catalogued tables in Glue
+Once the sync is complete (or in case of Glue Crawler option, once the crawler succeeds) you can inspect the catalogued tables in Glue
 and also query the table in Amazon Athena like below:
 
 <Tabs