From 580e2c90a221d767b331243851424e9c733d62a2 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Thu, 12 Dec 2024 16:54:05 -0500 Subject: [PATCH 1/9] Add PySpark version of OpenAIDefaults - WIP --- .../ml/services/openai/OpenAIDefaults.py | 32 +++++++++++++++++++ .../synapse/ml/services/openai/__init__.py | 0 .../ml/services/openai/OpenAIDefaults.scala | 19 +++++++++++ .../services/openai/test_OpenAIDefaults.py | 27 ++++++++++++++++ .../services/openai/OpenAIDefaultsSuite.scala | 2 -- .../azure/synapse/ml/param/GlobalParams.scala | 2 +- 6 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py create mode 100644 cognitive/src/main/python/synapse/ml/services/openai/__init__.py create mode 100644 cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py new file mode 100644 index 0000000000..5094eab236 --- /dev/null +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -0,0 +1,32 @@ +# Copyright (C) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in project root for information. + +import sys + +if sys.version >= "3": + basestring = str + +import pyspark +from pyspark import SparkContext + +class OpenAIDefaults: + def __init__(self): + self.defaults = SparkContext.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIDefaults + + def set_deployment_name(self, name): + self.defaults.setDeploymentName(name) + + def set_subscription_key(self, key): + self.defaults.setSubscriptionKey(key) + + def set_temperature(self, temp): + self.defaults.setTemperature(temp) + + def get_deployment_name(self): + self.defaults.getDeploymentName() + + def get_subscription_key(self): + self.defaults.getSubscriptionKey() + + def get_temperature(self): + self.defaults.getTemperature() diff --git a/cognitive/src/main/python/synapse/ml/services/openai/__init__.py b/cognitive/src/main/python/synapse/ml/services/openai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala index fe32df2267..cbbb05a9de 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala @@ -18,4 +18,23 @@ object OpenAIDefaults { def setTemperature(v: Double): Unit = { GlobalParams.setGlobalParam(OpenAITemperatureKey, Left(v)) } + + private def extractLeft[T](optEither: Option[Either[T, String]]): Option[T] = { + optEither match { + case Some(Left(v)) => Some(v) + case _ => None + } + } + + def getDeploymentName(): Option[String] = { + extractLeft(GlobalParams.getGlobalParam(OpenAIDeploymentNameKey)) + } + + def getSubscriptionKey(): Option[String] = { + extractLeft(GlobalParams.getGlobalParam(OpenAISubscriptionKey)) + } + + def getTemperature(): Option[Double] = { + extractLeft(GlobalParams.getGlobalParam(OpenAITemperatureKey)) + } } diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py new file mode 100644 index 0000000000..e800d453dd --- /dev/null +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -0,0 +1,27 @@ +# Copyright (C) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in project root for information. + +from synapse.ml.services.openai.OpenAIDefaults import OpenAIDefaults +from synapse.ml.services.openai.OpenAIPrompt import OpenAIPrompt +import unittest +from pyspark.sql import SQLContext + +from synapse.ml.core.init_spark import * + +spark = init_spark() +sc = SQLContext(spark.sparkContext) + +class TestOpenAIDefaults(unittest.TestCase): + def test_OpenAIDefaults(self): + defaults = OpenAIDefaults() + + defaults.set_deployment_name("Bing Bong") + defaults.set_subscription_key("SubKey") + defaults.set_temperature(0.05) + + self.assertEqual(defaults.getDeploymentName(), "Bing Bong") + self.assertEqual(defaults.getSubscriptionKey, "SubKey") + self.assertEqual(defaults.getTemperature, 0.05) + +if __name__ == "__main__": + result = unittest.main() diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala index 139d586592..c8edf8ccbf 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala @@ -55,9 +55,7 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { .count(r => Option(r.getSeq[String](0)).isDefined) assert(nonNullCount == 3) - } - test("OpenAIPrompt Check Params") { assert(prompt.getDeploymentName == deploymentName) assert(prompt.getSubscriptionKey == openAIAPIKey) assert(prompt.getTemperature == 0.05) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala index 98f7eb33e6..f5e0d6a48d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala @@ -18,7 +18,7 @@ object GlobalParams { GlobalParams(key) = value } - private def getGlobalParam[T](key: GlobalKey[T]): Option[T] = { + def getGlobalParam[T](key: GlobalKey[T]): Option[T] = { GlobalParams.get(key.asInstanceOf[GlobalKey[Any]]).map(_.asInstanceOf[T]) } From aa0b8596278cba7d15d842d2bed49bc9bad1ce40 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 13 Dec 2024 15:57:29 -0500 Subject: [PATCH 2/9] Add getters and resetters to OpenAIDefaults, and add Python version too! --- .../ml/services/openai/OpenAIDefaults.py | 21 +++++++---- .../ml/services/openai/OpenAIDefaults.scala | 36 ++++++++++++------- .../services/openai/OpenAIDefaultsSuite.scala | 33 ++++++++++++++--- .../azure/synapse/ml/param/GlobalParams.scala | 4 +++ 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py index 5094eab236..1fdb3e3a3e 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -16,17 +16,26 @@ def __init__(self): def set_deployment_name(self, name): self.defaults.setDeploymentName(name) - def set_subscription_key(self, key): - self.defaults.setSubscriptionKey(key) - - def set_temperature(self, temp): - self.defaults.setTemperature(temp) - def get_deployment_name(self): self.defaults.getDeploymentName() + def reset_deployment_name(self): + self.defaults.resetDeploymentName() + + def set_subscription_key(self, key): + self.defaults.setSubscriptionKey(key) + def get_subscription_key(self): self.defaults.getSubscriptionKey() + def reset_subscription_key(self): + self.defaults.resetSubscriptionKey() + + def set_temperature(self, temp): + self.defaults.setTemperature(temp) + def get_temperature(self): self.defaults.getTemperature() + + def reset_temperature(self): + self.defaults.resetTemperature() diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala index cbbb05a9de..f0c102c062 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala @@ -11,30 +11,42 @@ object OpenAIDefaults { GlobalParams.setGlobalParam(OpenAIDeploymentNameKey, Left(v)) } + def getDeploymentName: Option[String] = { + extractLeft(GlobalParams.getGlobalParam(OpenAIDeploymentNameKey)) + } + + def resetDeploymentName(): Unit = { + GlobalParams.resetGlobalParam(OpenAIDeploymentNameKey) + } + def setSubscriptionKey(v: String): Unit = { GlobalParams.setGlobalParam(OpenAISubscriptionKey, Left(v)) } + def getSubscriptionKey: Option[String] = { + extractLeft(GlobalParams.getGlobalParam(OpenAISubscriptionKey)) + } + + def resetSubscriptionKey(): Unit = { + GlobalParams.resetGlobalParam(OpenAISubscriptionKey) + } + def setTemperature(v: Double): Unit = { GlobalParams.setGlobalParam(OpenAITemperatureKey, Left(v)) } + def getTemperature: Option[Double] = { + extractLeft(GlobalParams.getGlobalParam(OpenAITemperatureKey)) + } + + def resetTemperature(): Unit = { + GlobalParams.resetGlobalParam(OpenAITemperatureKey) + } + private def extractLeft[T](optEither: Option[Either[T, String]]): Option[T] = { optEither match { case Some(Left(v)) => Some(v) case _ => None } } - - def getDeploymentName(): Option[String] = { - extractLeft(GlobalParams.getGlobalParam(OpenAIDeploymentNameKey)) - } - - def getSubscriptionKey(): Option[String] = { - extractLeft(GlobalParams.getGlobalParam(OpenAISubscriptionKey)) - } - - def getTemperature(): Option[Double] = { - extractLeft(GlobalParams.getGlobalParam(OpenAITemperatureKey)) - } } diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala index c8edf8ccbf..bb108bcb6d 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala @@ -10,11 +10,6 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { import spark.implicits._ - OpenAIDefaults.setDeploymentName(deploymentName) - OpenAIDefaults.setSubscriptionKey(openAIAPIKey) - OpenAIDefaults.setTemperature(0.05) - - def promptCompletion: OpenAICompletion = new OpenAICompletion() .setCustomServiceName(openAIServiceName) .setMaxTokens(200) @@ -28,6 +23,10 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { ).toDF("prompt") test("Completion w Globals") { + OpenAIDefaults.setDeploymentName(deploymentName) + OpenAIDefaults.setSubscriptionKey(openAIAPIKey) + OpenAIDefaults.setTemperature(0.05) + val fromRow = CompletionResponse.makeFromRowConverter promptCompletion.transform(promptDF).collect().foreach(r => fromRow(r.getAs[Row]("out")).choices.foreach(c => @@ -46,6 +45,10 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { ).toDF("text", "category") test("OpenAIPrompt w Globals") { + OpenAIDefaults.setDeploymentName(deploymentName) + OpenAIDefaults.setSubscriptionKey(openAIAPIKey) + OpenAIDefaults.setTemperature(0.05) + val nonNullCount = prompt .setPromptTemplate("here is a comma separated list of 5 {category}: {text}, ") .setPostProcessing("csv") @@ -60,4 +63,24 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { assert(prompt.getSubscriptionKey == openAIAPIKey) assert(prompt.getTemperature == 0.05) } + + test("Test Getters") { + assert(OpenAIDefaults.getDeploymentName.contains(deploymentName)) + assert(OpenAIDefaults.getSubscriptionKey.contains(openAIAPIKey)) + assert(OpenAIDefaults.getTemperature.contains(0.05)) + } + + test("Test Resetters") { + OpenAIDefaults.setDeploymentName(deploymentName) + OpenAIDefaults.setSubscriptionKey(openAIAPIKey) + OpenAIDefaults.setTemperature(0.05) + + OpenAIDefaults.resetDeploymentName() + OpenAIDefaults.resetSubscriptionKey() + OpenAIDefaults.resetTemperature() + + assert(OpenAIDefaults.getDeploymentName.isEmpty) + assert(OpenAIDefaults.getSubscriptionKey.isEmpty) + assert(OpenAIDefaults.getTemperature.isEmpty) + } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala index f5e0d6a48d..ac6f6a8bcb 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/GlobalParams.scala @@ -22,6 +22,10 @@ object GlobalParams { GlobalParams.get(key.asInstanceOf[GlobalKey[Any]]).map(_.asInstanceOf[T]) } + def resetGlobalParam[T](key: GlobalKey[T]): Unit = { + GlobalParams -= key + } + def getParam[T](p: Param[T]): Option[T] = { ParamToKeyMap.get(p).flatMap { key => key match { From cf932663e598b2d81b4a61ac81ea96bf1cd10481 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 13 Dec 2024 17:16:08 -0500 Subject: [PATCH 3/9] Fix python OpenAIDefaults and add tests! --- .../synapse/ml/services/openai/OpenAIDefaults.py | 12 +++++++++--- .../services/openai/test_OpenAIDefaults.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py index 1fdb3e3a3e..9285a124df 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -9,6 +9,12 @@ import pyspark from pyspark import SparkContext +def getOption(opt): + if opt.isDefined(): + return opt.get() + else: + return None + class OpenAIDefaults: def __init__(self): self.defaults = SparkContext.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIDefaults @@ -17,7 +23,7 @@ def set_deployment_name(self, name): self.defaults.setDeploymentName(name) def get_deployment_name(self): - self.defaults.getDeploymentName() + return getOption(self.defaults.getDeploymentName()) def reset_deployment_name(self): self.defaults.resetDeploymentName() @@ -26,7 +32,7 @@ def set_subscription_key(self, key): self.defaults.setSubscriptionKey(key) def get_subscription_key(self): - self.defaults.getSubscriptionKey() + return getOption(self.defaults.getSubscriptionKey()) def reset_subscription_key(self): self.defaults.resetSubscriptionKey() @@ -35,7 +41,7 @@ def set_temperature(self, temp): self.defaults.setTemperature(temp) def get_temperature(self): - self.defaults.getTemperature() + return getOption(self.defaults.getTemperature()) def reset_temperature(self): self.defaults.resetTemperature() diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py index e800d453dd..86fe0dee2c 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -19,9 +19,9 @@ def test_OpenAIDefaults(self): defaults.set_subscription_key("SubKey") defaults.set_temperature(0.05) - self.assertEqual(defaults.getDeploymentName(), "Bing Bong") - self.assertEqual(defaults.getSubscriptionKey, "SubKey") - self.assertEqual(defaults.getTemperature, 0.05) + self.assertEqual(defaults.get_deployment_name(), "Bing Bong") + self.assertEqual(defaults.get_subscription_key(), "SubKey") + self.assertEqual(defaults.get_temperature(), 0.05) if __name__ == "__main__": result = unittest.main() From 744f83f0d70e6953a545245b2b8453eafe0f34c7 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Tue, 17 Dec 2024 13:25:22 -0500 Subject: [PATCH 4/9] Adding tests and fixing style --- .../ml/services/openai/OpenAIDefaults.py | 6 ++- .../services/openai/test_OpenAIDefaults.py | 38 ++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py index 9285a124df..3292fe18ca 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -9,15 +9,19 @@ import pyspark from pyspark import SparkContext + def getOption(opt): if opt.isDefined(): return opt.get() else: return None + class OpenAIDefaults: def __init__(self): - self.defaults = SparkContext.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIDefaults + self.defaults = ( + SparkContext.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIDefaults + ) def set_deployment_name(self, name): self.defaults.setDeploymentName(name) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py index 86fe0dee2c..d239964767 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -11,8 +11,20 @@ spark = init_spark() sc = SQLContext(spark.sparkContext) + class TestOpenAIDefaults(unittest.TestCase): - def test_OpenAIDefaults(self): + def test_setters_and_getters(self): + defaults = OpenAIDefaults() + + defaults.set_deployment_name("Bing Bong") + defaults.set_subscription_key("SubKey") + defaults.set_temperature(0.05) + + self.assertEqual(defaults.get_deployment_name(), "Bing Bong") + self.assertEqual(defaults.get_subscription_key(), "SubKey") + self.assertEqual(defaults.get_temperature(), 0.05) + + def test_resetters(self): defaults = OpenAIDefaults() defaults.set_deployment_name("Bing Bong") @@ -23,5 +35,29 @@ def test_OpenAIDefaults(self): self.assertEqual(defaults.get_subscription_key(), "SubKey") self.assertEqual(defaults.get_temperature(), 0.05) + defaults.reset_deployment_name() + defaults.reset_subscription_key() + defaults.reset_temperature() + + self.assertEqual(defaults.get_deployment_name(), None) + self.assertEqual(defaults.get_subscription_key(), None) + self.assertEqual(defaults.get_temperature(), None) + + def test_two_defaults(self): + defaults = OpenAIDefaults() + + defaults.set_deployment_name("Bing Bong") + self.assertEqual(defaults.get_deployment_name(), "Bing Bong") + + defaults2 = OpenAIDefaults() + defaults.set_deployment_name("Bing Bong") + defaults2.set_deployment_name("Vamos") + self.assertEqual(defaults.get_deployment_name(), "Vamos") + + defaults2.set_deployment_name("Test 2") + defaults.set_deployment_name("Test 1") + self.assertEqual(defaults.get_deployment_name(), "Test 1") + + if __name__ == "__main__": result = unittest.main() From c239e9274fbd020f1aeb0aaeac3a1c278c3adbd1 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 20 Dec 2024 12:56:17 -0600 Subject: [PATCH 5/9] Add python tests --- .../ml/services/openai/OpenAIDefaults.py | 2 +- .../services/openai/test_OpenAIDefaults.py | 32 ++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py index 3292fe18ca..3a2566fd5f 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -42,7 +42,7 @@ def reset_subscription_key(self): self.defaults.resetSubscriptionKey() def set_temperature(self, temp): - self.defaults.setTemperature(temp) + self.defaults.setTemperature(float(temp)) def get_temperature(self): return getOption(self.defaults.getTemperature()) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py index d239964767..e276378dbb 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -3,8 +3,10 @@ from synapse.ml.services.openai.OpenAIDefaults import OpenAIDefaults from synapse.ml.services.openai.OpenAIPrompt import OpenAIPrompt -import unittest +import unittest,os, json, subprocess from pyspark.sql import SQLContext +from pyspark.sql.functions import col + from synapse.ml.core.init_spark import * @@ -58,6 +60,34 @@ def test_two_defaults(self): defaults.set_deployment_name("Test 1") self.assertEqual(defaults.get_deployment_name(), "Test 1") + def test_prompt_w_defaults(self): + + secretJson = subprocess.check_output( + "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", + shell=True, + ) + openai_api_key = json.loads(secretJson)["value"] + + df = spark.createDataFrame([ + ("apple", "fruits"), + ("mercedes", "cars"), + ("cake", "dishes"), + ], ["text", "category"]) + + defaults = OpenAIDefaults() + defaults.set_deployment_name("gpt-35-turbo-0125") + defaults.set_subscription_key(openai_api_key) + defaults.set_temperature(0.05) + + prompt = OpenAIPrompt() + prompt = prompt.setOutputCol("outParsed") + prompt = prompt.setCustomServiceName("synapseml-openai-2") + prompt = prompt.setPromptTemplate("Complete this comma separated list of 5 {category}: {text}, ") + results = prompt.transform(df) + results.select("outParsed").show(truncate = False) + nonNullCount = results.filter(col("outParsed").isNotNull()).count() + assert (nonNullCount == 3) + if __name__ == "__main__": result = unittest.main() From e703bd2deb600c584c1c7602083950ed3250bd21 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 20 Dec 2024 16:26:59 -0600 Subject: [PATCH 6/9] Add URL to OpenAIDefaults and add new tests --- .../synapse/ml/services/openai/OpenAIDefaults.py | 9 +++++++++ .../synapse/ml/services/openai/OpenAIDefaults.scala | 13 +++++++++++++ .../services/openai/test_OpenAIDefaults.py | 8 +++++++- .../ml/services/openai/OpenAIDefaultsSuite.scala | 13 +++++++++++-- .../azure/synapse/ml/io/http/HTTPTransformer.scala | 6 +++++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py index 3a2566fd5f..7aad53f842 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/OpenAIDefaults.py @@ -49,3 +49,12 @@ def get_temperature(self): def reset_temperature(self): self.defaults.resetTemperature() + + def set_URL(self, URL): + self.defaults.setURL(URL) + + def get_URL(self): + return getOption(self.defaults.getURL()) + + def reset_URL(self): + self.defaults.resetURL() diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala index f0c102c062..f8405fbe1b 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaults.scala @@ -5,6 +5,7 @@ package com.microsoft.azure.synapse.ml.services.openai import com.microsoft.azure.synapse.ml.param.GlobalParams import com.microsoft.azure.synapse.ml.services.OpenAISubscriptionKey +import com.microsoft.azure.synapse.ml.io.http.URLKey object OpenAIDefaults { def setDeploymentName(v: String): Unit = { @@ -43,6 +44,18 @@ object OpenAIDefaults { GlobalParams.resetGlobalParam(OpenAITemperatureKey) } + def setURL(v: String): Unit = { + GlobalParams.setGlobalParam(URLKey, v) + } + + def getURL: Option[String] = { + GlobalParams.getGlobalParam(URLKey) + } + + def resetURL(): Unit = { + GlobalParams.resetGlobalParam(URLKey) + } + private def extractLeft[T](optEither: Option[Either[T, String]]): Option[T] = { optEither match { case Some(Left(v)) => Some(v) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py index e276378dbb..a24190c3dc 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -21,10 +21,12 @@ def test_setters_and_getters(self): defaults.set_deployment_name("Bing Bong") defaults.set_subscription_key("SubKey") defaults.set_temperature(0.05) + defaults.set_URL("Test URL") self.assertEqual(defaults.get_deployment_name(), "Bing Bong") self.assertEqual(defaults.get_subscription_key(), "SubKey") self.assertEqual(defaults.get_temperature(), 0.05) + self.assertEqual(defaults.get_URL(), "Test URL") def test_resetters(self): defaults = OpenAIDefaults() @@ -32,18 +34,22 @@ def test_resetters(self): defaults.set_deployment_name("Bing Bong") defaults.set_subscription_key("SubKey") defaults.set_temperature(0.05) + defaults.set_URL("Test URL") self.assertEqual(defaults.get_deployment_name(), "Bing Bong") self.assertEqual(defaults.get_subscription_key(), "SubKey") self.assertEqual(defaults.get_temperature(), 0.05) + self.assertEqual(defaults.get_URL(), "Test URL") defaults.reset_deployment_name() defaults.reset_subscription_key() defaults.reset_temperature() + defaults.reset_URL() self.assertEqual(defaults.get_deployment_name(), None) self.assertEqual(defaults.get_subscription_key(), None) self.assertEqual(defaults.get_temperature(), None) + self.assertEqual(defaults.get_URL(), None) def test_two_defaults(self): defaults = OpenAIDefaults() @@ -78,10 +84,10 @@ def test_prompt_w_defaults(self): defaults.set_deployment_name("gpt-35-turbo-0125") defaults.set_subscription_key(openai_api_key) defaults.set_temperature(0.05) + defaults.set_URL("https://synapseml-openai-2.openai.azure.com/") prompt = OpenAIPrompt() prompt = prompt.setOutputCol("outParsed") - prompt = prompt.setCustomServiceName("synapseml-openai-2") prompt = prompt.setPromptTemplate("Complete this comma separated list of 5 {category}: {text}, ") results = prompt.transform(df) results.select("outParsed").show(truncate = False) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala index bb108bcb6d..487e0345bc 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIDefaultsSuite.scala @@ -11,7 +11,6 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { import spark.implicits._ def promptCompletion: OpenAICompletion = new OpenAICompletion() - .setCustomServiceName(openAIServiceName) .setMaxTokens(200) .setOutputCol("out") .setPromptCol("prompt") @@ -26,6 +25,7 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { OpenAIDefaults.setDeploymentName(deploymentName) OpenAIDefaults.setSubscriptionKey(openAIAPIKey) OpenAIDefaults.setTemperature(0.05) + OpenAIDefaults.setURL(s"https://$openAIServiceName.openai.azure.com/") val fromRow = CompletionResponse.makeFromRowConverter promptCompletion.transform(promptDF).collect().foreach(r => @@ -34,7 +34,6 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { } lazy val prompt: OpenAIPrompt = new OpenAIPrompt() - .setCustomServiceName(openAIServiceName) .setOutputCol("outParsed") lazy val df: DataFrame = Seq( @@ -48,6 +47,7 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { OpenAIDefaults.setDeploymentName(deploymentName) OpenAIDefaults.setSubscriptionKey(openAIAPIKey) OpenAIDefaults.setTemperature(0.05) + OpenAIDefaults.setURL(s"https://$openAIServiceName.openai.azure.com/") val nonNullCount = prompt .setPromptTemplate("here is a comma separated list of 5 {category}: {text}, ") @@ -65,22 +65,31 @@ class OpenAIDefaultsSuite extends Flaky with OpenAIAPIKey { } test("Test Getters") { + OpenAIDefaults.setDeploymentName(deploymentName) + OpenAIDefaults.setSubscriptionKey(openAIAPIKey) + OpenAIDefaults.setTemperature(0.05) + OpenAIDefaults.setURL(s"https://$openAIServiceName.openai.azure.com/") + assert(OpenAIDefaults.getDeploymentName.contains(deploymentName)) assert(OpenAIDefaults.getSubscriptionKey.contains(openAIAPIKey)) assert(OpenAIDefaults.getTemperature.contains(0.05)) + assert(OpenAIDefaults.getURL.contains(s"https://$openAIServiceName.openai.azure.com/")) } test("Test Resetters") { OpenAIDefaults.setDeploymentName(deploymentName) OpenAIDefaults.setSubscriptionKey(openAIAPIKey) OpenAIDefaults.setTemperature(0.05) + OpenAIDefaults.setURL(s"https://$openAIServiceName.openai.azure.com/") OpenAIDefaults.resetDeploymentName() OpenAIDefaults.resetSubscriptionKey() OpenAIDefaults.resetTemperature() + OpenAIDefaults.resetURL() assert(OpenAIDefaults.getDeploymentName.isEmpty) assert(OpenAIDefaults.getSubscriptionKey.isEmpty) assert(OpenAIDefaults.getTemperature.isEmpty) + assert(OpenAIDefaults.getURL.isEmpty) } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala index 8d942e34b1..43ea24d112 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala @@ -7,7 +7,7 @@ import com.microsoft.azure.synapse.ml.codegen.Wrappable import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} import com.microsoft.azure.synapse.ml.io.http.HandlingUtils.HandlerFunc import com.microsoft.azure.synapse.ml.logging.{FeatureNames, SynapseMLLogging} -import com.microsoft.azure.synapse.ml.param.UDFParam +import com.microsoft.azure.synapse.ml.param.{GlobalKey, GlobalParams, UDFParam} import org.apache.http.impl.client.CloseableHttpClient import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.param._ @@ -76,10 +76,14 @@ trait ConcurrencyParams extends Wrappable { setDefault(concurrency -> 1, timeout -> 60.0) } +case object URLKey extends GlobalKey[String] + trait HasURL extends Params { val url: Param[String] = new Param[String](this, "url", "Url of the service") + GlobalParams.registerParam(url, URLKey) + /** @group getParam */ def getUrl: String = $(url) From ad42fcd0dabed13072f41d2abf04dabcd8505a57 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 20 Dec 2024 16:43:53 -0600 Subject: [PATCH 7/9] Fix style --- .../services/openai/test_OpenAIDefaults.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py index a24190c3dc..beb86c49bb 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_OpenAIDefaults.py @@ -3,7 +3,7 @@ from synapse.ml.services.openai.OpenAIDefaults import OpenAIDefaults from synapse.ml.services.openai.OpenAIPrompt import OpenAIPrompt -import unittest,os, json, subprocess +import unittest, os, json, subprocess from pyspark.sql import SQLContext from pyspark.sql.functions import col @@ -74,11 +74,14 @@ def test_prompt_w_defaults(self): ) openai_api_key = json.loads(secretJson)["value"] - df = spark.createDataFrame([ - ("apple", "fruits"), - ("mercedes", "cars"), - ("cake", "dishes"), - ], ["text", "category"]) + df = spark.createDataFrame( + [ + ("apple", "fruits"), + ("mercedes", "cars"), + ("cake", "dishes"), + ], + ["text", "category"], + ) defaults = OpenAIDefaults() defaults.set_deployment_name("gpt-35-turbo-0125") @@ -88,11 +91,13 @@ def test_prompt_w_defaults(self): prompt = OpenAIPrompt() prompt = prompt.setOutputCol("outParsed") - prompt = prompt.setPromptTemplate("Complete this comma separated list of 5 {category}: {text}, ") + prompt = prompt.setPromptTemplate( + "Complete this comma separated list of 5 {category}: {text}, " + ) results = prompt.transform(df) - results.select("outParsed").show(truncate = False) + results.select("outParsed").show(truncate=False) nonNullCount = results.filter(col("outParsed").isNotNull()).count() - assert (nonNullCount == 3) + assert nonNullCount == 3 if __name__ == "__main__": From 512501d4000dc6f5892e0a7fe9721259e22dce24 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 20 Dec 2024 17:11:00 -0600 Subject: [PATCH 8/9] Revert "chore: Optimizing the method getOptionalParams in HasOpenAITextParams (#2315)" This reverts commit 08aab6adf1a92aa09a9d15721f4d4b4bf5407c80. --- .../synapse/ml/services/openai/OpenAI.scala | 66 +++++++------------ 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAI.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAI.scala index e1e35fc020..bd95451d46 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAI.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAI.scala @@ -106,12 +106,11 @@ trait HasOpenAIEmbeddingParams extends HasOpenAISharedParams with HasAPIVersion case object OpenAITemperatureKey extends GlobalKey[Either[Double, String]] trait HasOpenAITextParams extends HasOpenAISharedParams { + val maxTokens: ServiceParam[Int] = new ServiceParam[Int]( this, "maxTokens", "The maximum number of tokens to generate. Has minimum of 0.", - isRequired = false){ - override val payloadName: String = "max_tokens" - } + isRequired = false) def getMaxTokens: Int = getScalarParam(maxTokens) @@ -158,9 +157,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { " So 0.1 means only the tokens comprising the top 10 percent probability mass are considered." + " We generally recommend using this or `temperature` but not both." + " Minimum of 0 and maximum of 1 allowed.", - isRequired = false) { - override val payloadName: String = "top_p" - } + isRequired = false) def getTopP: Double = getScalarParam(topP) @@ -189,9 +186,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { " So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens." + " If `logprobs` is 0, only the chosen tokens will have logprobs returned." + " Minimum of 0 and maximum of 100 allowed.", - isRequired = false) { - override val payloadName: String = "logprobs" - } + isRequired = false) def getLogProbs: Int = getScalarParam(logProbs) @@ -217,9 +212,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { val cacheLevel: ServiceParam[Int] = new ServiceParam[Int]( this, "cacheLevel", "can be used to disable any server-side caching, 0=no cache, 1=prompt prefix enabled, 2=full cache", - isRequired = false){ - override val payloadName: String = "cache_level" - } + isRequired = false) def getCacheLevel: Int = getScalarParam(cacheLevel) @@ -233,9 +226,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { this, "presencePenalty", "How much to penalize new tokens based on their existing frequency in the text so far." + " Decreases the likelihood of the model to repeat the same line verbatim. Has minimum of -2 and maximum of 2.", - isRequired = false){ - override val payloadName: String = "presence_penalty" - } + isRequired = false) def getPresencePenalty: Double = getScalarParam(presencePenalty) @@ -249,9 +240,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { this, "frequencyPenalty", "How much to penalize new tokens based on whether they appear in the text so far." + " Increases the likelihood of the model to talk about new topics.", - isRequired = false){ - override val payloadName: String = "frequency_penalty" - } + isRequired = false) def getFrequencyPenalty: Double = getScalarParam(frequencyPenalty) @@ -265,9 +254,7 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { this, "bestOf", "How many generations to create server side, and display only the best." + " Will not stream intermediate progress if best_of > 1. Has maximum value of 128.", - isRequired = false){ - override val payloadName: String = "best_of" - } + isRequired = false) def getBestOf: Int = getScalarParam(bestOf) @@ -277,27 +264,24 @@ trait HasOpenAITextParams extends HasOpenAISharedParams { def setBestOfCol(v: String): this.type = setVectorParam(bestOf, v) - // list of shared text parameters. In method getOptionalParams, we will iterate over these parameters - // to compute the optional parameters. Since this list never changes, we can create it once and reuse it. - private val sharedTextParams = Seq( - maxTokens, - temperature, - topP, - user, - n, - echo, - stop, - cacheLevel, - presencePenalty, - frequencyPenalty, - bestOf, - logProbs - ) - private[ml] def getOptionalParams(r: Row): Map[String, Any] = { - sharedTextParams.flatMap { param => - getValueOpt(r, param).map { value => param.payloadName -> value } - }.toMap + Seq( + maxTokens, + temperature, + topP, + user, + n, + echo, + stop, + cacheLevel, + presencePenalty, + frequencyPenalty, + bestOf + ).flatMap(param => + getValueOpt(r, param).map(v => (GenerationUtils.camelToSnake(param.name), v)) + ).++(Seq( + getValueOpt(r, logProbs).map(v => ("logprobs", v)) + ).flatten).toMap } } From 30dd85a5fea467ab7f20a81c667e32e0f41e4c62 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Sat, 21 Dec 2024 01:03:36 -0600 Subject: [PATCH 9/9] Fix bug where params that don't exist try to be assigned --- .../azure/synapse/ml/services/openai/OpenAIPrompt.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala index 332f83bbb5..fc14dd54e4 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala @@ -219,7 +219,7 @@ class OpenAIPrompt(override val uid: String) extends Transformer } // apply all parameters extractParamMap().toSeq - .filter(p => !localParamNames.contains(p.param.name)) + .filter(p => !localParamNames.contains(p.param.name) && completion.hasParam(p.param.name)) .foreach(p => completion.set(completion.getParam(p.param.name), p.value)) completion