add sql credential pass through doc (#883)

* add sql credential pass through doc Signed-off-by: Yuqing Wei <[email protected]> * fix comments Signed-off-by: Yuqing Wei <[email protected]> * fix comments Signed-off-by: Yuqing Wei <[email protected]> Signed-off-by: Yuqing Wei <[email protected]>
feathr-ai · Dec 2, 2022 · f8a7e76 · f8a7e76
1 parent 4efb683
commit f8a7e76
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 1 deletion.
diff --git a/docs/how-to-guides/feathr-credential-passthru.md b/docs/how-to-guides/feathr-credential-passthru.md
@@ -34,4 +34,13 @@ client.materialize_features(settings, allow_materialize_non_agg_feature=True, ex
 
 In this code block, replace the `appId`, `clientSecret`, and `tenant` placeholder values in this code block with the values that you collected while completing the first step.
 
-3. Don't forget your other configuration settings, such as the ones that are specific to Feathr in [Feathr Job Configuration during Run Time](./feathr-job-configuration.md).
+3. Don't forget your other configuration settings, such as the ones that are specific to Feathr in [Feathr Job Configuration during Run Time](./feathr-job-configuration.md).
+
+4. Azure SQL Database Credential pass through is also supported. To achieve so you need to pass your token to environment variables and set `auth` parameter to `TOKEN` in `JdbcSource` or `JdbcSink`. For example:
+```python
+output_name = 'output'
+sink = client.JdbcSink(name=output_name, url="some_jdbc_url", dbtable="table_name", auth="TOKEN")
+
+os.environ[f"{output_name.upper()}_TOKEN"] = self.credential.get_token("https://management.azure.com/.default").token
+client.get_offline_features(..., output_path=sink)
+```
diff --git a/docs/how-to-guides/jdbc-cosmos-notes.md b/docs/how-to-guides/jdbc-cosmos-notes.md
@@ -62,6 +62,32 @@ client.get_offline_features(...)
 
 These values will be automatically passed to the Feathr core when submitting the job.
 
+If you want to use token, the code will be like this:
+Step 1: Define the source JdbcSource
+```python
+src_name="source_name"
+source = JdbcSource(name=src_name, url="jdbc:...", dbtable="table_name", auth="TOKEN")
+anchor = FeatureAnchor(name="anchor_name",
+                        source=source,
+                        features=[some_features, some_other_features])
+```
+Step 2: Set the environment variable before submitting the job
+```python
+os.environ[f"{src_name.upper()}_TOKEN"] = "some_token"
+```
+To enable Azure AD authentication in Azure SQL database, please refer to [this document](https://learn.microsoft.com/en-us/azure/azure-sql/database/authentication-aad-overview?view=azuresql#overview).
+
+There are several ways to obtain Azure AD access token, please refer to [this document](https://docs.microsoft.com/en-us/azure/active-directory/develop/access-tokens) for more details.
+
+If you want to leverage existing credential in python client, you could try:
+```python
+from azure.identity import DefaultAzureCredential
+
+credential = DefaultAzureCredential()
+token = credential.get_token("https://management.azure.com/.default").token()
+```
+
+
 ## Using SQL database as the offline store
 
 To use SQL database as the offline store, you can use `JdbcSink` as the `output_path` parameter of `FeathrClient.get_offline_features`, e.g.:
@@ -76,6 +102,7 @@ os.environ[f"{name.upper()}_USER"] = "some_user_name"
 os.environ[f"{name.upper()}_PASSWORD"] = "some_magic_word"
 client.get_offline_features(..., output_path=sink)
 ```
+"TOKEN" auth type is also supported in `JdbcSink`.
 
 ## Using SQL database as the online store
 

diff --git a/feathr_project/test/test_azure_spark_e2e.py b/feathr_project/test/test_azure_spark_e2e.py
@@ -245,6 +245,45 @@ def test_feathr_get_offline_features_to_sql():
     # assuming the job can successfully run; otherwise it will throw exception
     client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS)
 
+@pytest.mark.skip(reason="Marked as skipped as we need to setup token and enable SQL AAD login for this test")
+def test_feathr_get_offline_features_to_sql_with_token():
+    """
+    Test get_offline_features() can save data to SQL.
+    """
+    # runner.invoke(init, [])
+    test_workspace_dir = Path(
+        __file__).parent.resolve() / "test_user_workspace"
+    client: FeathrClient = basic_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml"))
+
+    location_id = TypedKey(key_column="DOLocationID",
+                            key_column_type=ValueType.INT32,
+                            description="location id in NYC",
+                            full_name="nyc_taxi.location_id")
+
+    feature_query = FeatureQuery(
+        feature_list=["f_location_avg_fare"], key=location_id)
+    settings = ObservationSettings(
+        observation_path="wasbs://[email protected]/sample_data/green_tripdata_2020-04.csv",
+        event_timestamp_column="lpep_dropoff_datetime",
+        timestamp_format="yyyy-MM-dd HH:mm:ss")
+
+    now = datetime.now()
+
+    # Set DB token before submitting job
+    # os.environ[f"SQL1_TOKEN"] = "some_token"
+    os.environ["SQL1_TOKEN"] = client.credential.get_token("https://management.azure.com/.default").token
+    output_path = JdbcSink(name="sql1",
+                            url="jdbc:sqlserver://feathrazureci.database.windows.net:1433;database=feathrci;encrypt=true;",
+                            dbtable=f'feathr_ci_sql_token_{str(now)[:19].replace(" ", "_").replace(":", "_").replace("-", "_")}',
+                            auth="TOKEN")
+
+    client.get_offline_features(observation_settings=settings,
+                                feature_query=feature_query,
+                                output_path=output_path)
+
+    # assuming the job can successfully run; otherwise it will throw exception
+    client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS)
+
 def test_feathr_materialize_to_cosmosdb():
     """
     Test FeathrClient() CosmosDbSink.