Skip to content

Commit

Permalink
Merge branch 'refactor-cog' of https://github.com/mhamilton723/SynapseML
Browse files Browse the repository at this point in the history
 into refactor-cog
  • Loading branch information
mhamilton723 committed Oct 31, 2023
2 parents 25c65c8 + a094bc1 commit 4fddd75
Show file tree
Hide file tree
Showing 33 changed files with 88 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from synapse.ml.services.langchain import LangchainTransformer
from synapsemltest.spark import *


#######################################################
# this part is to correct a bug in langchain,
# where the llm type of AzureOpenAI was set
Expand Down
1 change: 0 additions & 1 deletion core/src/main/python/synapse/ml/core/platform/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def running_on_databricks():


def find_secret(secret_name, keyvault):

if running_on_synapse() or running_on_synapse_internal():
from notebookutils.mssparkutils.credentials import getSecret

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def __init__(
user_feature_vector_mapping_df: DataFrame,
res_feature_vector_mapping_df: DataFrame,
):

self.tenant_col = tenant_col
self.user_col = user_col
self.user_vec_col = user_vec_col
Expand All @@ -127,7 +126,6 @@ def replace_mappings(
user_feature_vector_mapping_df: Optional[DataFrame] = None,
res_feature_vector_mapping_df: Optional[DataFrame] = None,
):

"""
create a new model replacing the user and resource models with new ones (optional)
Expand Down Expand Up @@ -765,7 +763,6 @@ def __init__(
negScore: Optional[float] = None,
historyAccessDf: Optional[DataFrame] = None,
):

super().__init__()

if applyImplicitCf:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def __init__(
indexed_col_names_arr: List[str],
complementset_factor: int,
):

super().__init__()

# we assume here that all indices of the columns are continuous within their partition_key
Expand Down
3 changes: 0 additions & 3 deletions core/src/main/python/synapse/ml/cyber/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(
num_eng_resources: int = 50,
single_component: bool = True,
):

self.hr_users = ["hr_user_" + str(i) for i in range(num_hr_users)]
self.hr_resources = ["hr_res_" + str(i) for i in range(num_hr_resources)]

Expand Down Expand Up @@ -67,7 +66,6 @@ def edges_between(
full_node_coverage: bool,
not_set: Optional[Set[Tuple[str, str]]] = None,
) -> List[Tuple[str, str, float]]:

import itertools

if len(users) == 0 or len(resources) == 0:
Expand All @@ -92,7 +90,6 @@ def edges_between(
and (len(seen_users) < len(users))
or (len(seen_resources) < len(resources))
):

if cart is not None:
assert len(cart) > 0, cart
ii = self.rand.randint(0, len(cart) - 1)
Expand Down
6 changes: 0 additions & 6 deletions core/src/main/python/synapse/ml/cyber/feature/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(
per_group_stats: Union[DataFrame, Dict[str, float]],
use_pandas: bool = True,
):

super().__init__()
ExplainBuilder.build(
self,
Expand Down Expand Up @@ -128,7 +127,6 @@ def __init__(
output_col: str,
use_pandas: bool = True,
):

super().__init__()
ExplainBuilder.build(
self,
Expand Down Expand Up @@ -193,7 +191,6 @@ def __init__(
coefficient_factor: float = 1.0,
use_pandas: bool = True,
):

super().__init__(
input_col,
partition_key,
Expand Down Expand Up @@ -250,7 +247,6 @@ def __init__(
coefficient_factor: float = 1.0,
use_pandas: bool = True,
):

super().__init__(input_col, partition_key, output_col, use_pandas)
self.coefficient_factor = coefficient_factor

Expand Down Expand Up @@ -292,7 +288,6 @@ def __init__(
max_required_value: float,
use_pandas: bool = True,
):

super().__init__(
input_col,
partition_key,
Expand Down Expand Up @@ -382,7 +377,6 @@ def __init__(
max_required_value: float = 1.0,
use_pandas: bool = True,
):

super().__init__(input_col, partition_key, output_col, use_pandas)
self.min_required_value = min_required_value
self.max_required_value = max_required_value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

@inherit_doc
class IsolationForestModel(_IsolationForestModel):

# The generated implementation does not work. Override it to return the java object.
def getInnerModel(self):
return self._java_obj.getInnerModel()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@


class DeepTextClassifier(TorchEstimator, TextPredictionParams):

checkpoint = Param(
Params._dummy(), "checkpoint", "checkpoint of the deep text classifier"
)
Expand Down Expand Up @@ -230,7 +229,6 @@ def _get_or_create_backend(self):
)

def _update_transformation_fn(self):

text_col = self.getTextCol()
label_col = self.getLabelCol()
max_token_len = self.getMaxTokenLen()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@


class DeepTextModel(TorchModel, TextPredictionParams):

tokenizer = Param(Params._dummy(), "tokenizer", "tokenizer")

checkpoint = Param(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@


class DeepVisionClassifier(TorchEstimator, VisionPredictionParams):

backbone = Param(
Params._dummy(), "backbone", "backbone of the deep vision classifier"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


class DeepVisionModel(TorchModel, VisionPredictionParams):

transform_fn = Param(
Params._dummy(),
"transform_fn",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


class HasLabelColParam(Params):

label_col = Param(
Params._dummy(),
"label_col",
Expand All @@ -31,7 +30,6 @@ def getLabelCol(self):


class HasImageColParam(Params):

image_col = Param(
Params._dummy(),
"image_col",
Expand All @@ -58,7 +56,6 @@ def getImageCol(self):

## TODO: Potentially generalize to support multiple text columns as input
class HasTextColParam(Params):

text_col = Param(
Params._dummy(),
"text_col",
Expand All @@ -84,7 +81,6 @@ def getTextCol(self):


class HasPredictionColParam(Params):

prediction_col = Param(
Params._dummy(),
"prediction_col",
Expand Down
1 change: 0 additions & 1 deletion deep-learning/src/test/python/synapsemltest/dl/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def num_processes(self):


def _download_dataset():

urllib.request.urlretrieve(
"https://mmlspark.blob.core.windows.net/publicwasb/17flowers.tgz",
dataset_dir + "17flowers.tgz",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def test_bert_base_cased():
]

with local_store() as store:

checkpoint = "bert-base-uncased"

deep_text_classifier = DeepTextClassifier(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def test_mobilenet_v2(get_data_path):
train_folder, test_folder = get_data_path

with local_store() as store:

deep_vision_classifier = DeepVisionClassifier(
backbone="mobilenet_v2",
store=store,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@
"source": [
"from synapse.ml.core.platform import find_secret\n",
"\n",
"service_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\")\n",
"service_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"service_loc = \"eastus\""
]
},
Expand Down Expand Up @@ -358,7 +360,9 @@
"from pyspark.sql.functions import udf\n",
"import random\n",
"\n",
"service_key_2 = find_secret(secret_name=\"cognitive-api-key-2\", keyvault=\"mmlspark-build-keys\")\n",
"service_key_2 = find_secret(\n",
" secret_name=\"cognitive-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"keys = [service_key, service_key_2]\n",
"\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,15 @@
"from synapse.ml.core.platform import find_secret\n",
"\n",
"# An Anomaly Dectector subscription key\n",
"anomalyKey = find_secret(secret_name=\"anomaly-api-key\", keyvault=\"mmlspark-build-keys\") # use your own anomaly api key\n",
"anomalyKey = find_secret(\n",
" secret_name=\"anomaly-api-key\", keyvault=\"mmlspark-build-keys\"\n",
") # use your own anomaly api key\n",
"# Your storage account name\n",
"storageName = \"anomalydetectiontest\" # use your own storage account name\n",
"# A connection string to your blob storage account\n",
"storageKey = find_secret(secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\") # use your own storage key\n",
"storageKey = find_secret(\n",
" secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\"\n",
") # use your own storage key\n",
"# A place to save intermediate MVAD results\n",
"intermediateSaveDir = (\n",
" \"wasbs://[email protected]/intermediateData\"\n",
Expand Down
20 changes: 15 additions & 5 deletions docs/Explore Algorithms/AI Services/Overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -159,22 +159,32 @@
"from synapse.ml.core.platform import *\n",
"\n",
"# A general AI services key for Text Analytics, Computer Vision and Form Recognizer (or use separate keys that belong to each service)\n",
"service_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\") # Replace it with your ai service key, check prerequisites for more details\n",
"service_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace it with your ai service key, check prerequisites for more details\n",
"service_loc = \"eastus\"\n",
"\n",
"# A Bing Search v7 subscription key\n",
"bing_search_key = find_secret(secret_name=\"bing-search-key\", keyvault=\"mmlspark-build-keys\") # Replace the call to find_secret with your key as a python string.\n",
"bing_search_key = find_secret(\n",
" secret_name=\"bing-search-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace the call to find_secret with your key as a python string.\n",
"\n",
"# An Anomaly Detector subscription key\n",
"anomaly_key = find_secret(secret_name=\"anomaly-api-key\", keyvault=\"mmlspark-build-keys\") # Replace the call to find_secret with your key as a python string. If you don't have an anomaly detection resource created before Sep 20th 2023, you won't be able to create one.\n",
"anomaly_key = find_secret(\n",
" secret_name=\"anomaly-api-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace the call to find_secret with your key as a python string. If you don't have an anomaly detection resource created before Sep 20th 2023, you won't be able to create one.\n",
"anomaly_loc = \"westus2\"\n",
"\n",
"# A Translator subscription key\n",
"translator_key = find_secret(secret_name=\"translator-key\", keyvault=\"mmlspark-build-keys\") # Replace the call to find_secret with your key as a python string.\n",
"translator_key = find_secret(\n",
" secret_name=\"translator-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace the call to find_secret with your key as a python string.\n",
"translator_loc = \"eastus\"\n",
"\n",
"# An Azure search key\n",
"search_key = find_secret(secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\") # Replace the call to find_secret with your key as a python string."
"search_key = find_secret(\n",
" secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace the call to find_secret with your key as a python string."
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@
"from synapse.ml.core.platform import find_secret\n",
"\n",
"# put your service keys here\n",
"cognitive_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\")\n",
"cognitive_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"cognitive_location = \"eastus\"\n",
"bing_search_key = find_secret(secret_name=\"bing-search-key\", keyvault=\"mmlspark-build-keys\")"
"bing_search_key = find_secret(\n",
" secret_name=\"bing-search-key\", keyvault=\"mmlspark-build-keys\"\n",
")"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
"source": [
"from synapse.ml.core.platform import find_secret\n",
"\n",
"cognitive_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\")\n",
"cognitive_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"cognitive_location = \"eastus\""
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@
"from pyspark.sql.functions import lit, udf, col, split\n",
"from synapse.ml.core.platform import *\n",
"\n",
"cognitive_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\")\n",
"cognitive_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"cognitive_loc = \"eastus\"\n",
"azure_search_key = find_secret(secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\")\n",
"azure_search_key = find_secret(\n",
" secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"search_service = \"mmlspark-azure-search\"\n",
"search_index = \"test\""
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@
" from notebookutils import mssparkutils\n",
"\n",
"# Fill this in with your cognitive service information\n",
"service_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\") # Replace this line with a string like service_key = \"dddjnbdkw9329\"\n",
"service_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace this line with a string like service_key = \"dddjnbdkw9329\"\n",
"service_loc = \"eastus\"\n",
"\n",
"storage_container = \"audiobooks\"\n",
"storage_key = find_secret(secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\")\n",
"storage_key = find_secret(\n",
" secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"storage_account = \"anomalydetectiontest\""
],
"outputs": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@
"from pyspark.sql import SparkSession\n",
"from synapse.ml.core.platform import find_secret\n",
"\n",
"ai_services_key = find_secret(secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\")\n",
"ai_services_key = find_secret(\n",
" secret_name=\"cognitive-api-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"ai_services_location = \"eastus\"\n",
"\n",
"# Fill in the following lines with your Azure service information\n",
Expand All @@ -155,7 +157,9 @@
"# Azure Cognitive Search\n",
"cogsearch_name = \"mmlspark-azure-search\"\n",
"cogsearch_index_name = \"examplevectorindex\"\n",
"cogsearch_api_key = find_secret(secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\")"
"cogsearch_api_key = find_secret(\n",
" secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\"\n",
")"
],
"id": "8fbc0743f3a0f6ab"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@
"from synapse.ml.core.platform import *\n",
"\n",
"# Azure Maps account key\n",
"maps_key = find_secret(secret_name=\"azuremaps-api-key\", keyvault=\"mmlspark-build-keys\") # Replace this with your azure maps key\n",
"maps_key = find_secret(\n",
" secret_name=\"azuremaps-api-key\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace this with your azure maps key\n",
"\n",
"# Creator Geo prefix\n",
"# for this example, assuming that the creator resource is created in `EAST US 2`.\n",
Expand Down
Loading

0 comments on commit 4fddd75

Please sign in to comment.