diff --git a/.vscode/launch.json b/.vscode/launch.json index 090a3ccf1..3aa9beabd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,19 +9,14 @@ "name": "Python: WebApp backend", "type": "python", "request": "launch", - "module": "flask", - "cwd": "${workspaceFolder}/app/backend", - "env": { - "FLASK_APP": "app.py", - "FLASK_ENV": "development", - "FLASK_DEBUG": "0" - }, + "module": "uvicorn", "args": [ - "run", - "--no-debugger", - "--no-reload", - "-p 5000" + "app:app", + "--reload", + "--port", + "5000" ], + "cwd": "${workspaceFolder}/app/backend", "console": "integratedTerminal", "justMyCode": true, "envFile": "${workspaceFolder}/scripts/environments/infrastructure.debug.env", diff --git a/app/backend/app.py b/app/backend/app.py index 7ef50db40..6944975bc 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -2,13 +2,14 @@ # Licensed under the MIT license. import logging -import mimetypes import os import json import urllib.parse from datetime import datetime, timedelta - +from fastapi.staticfiles import StaticFiles import openai +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import RedirectResponse from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach from azure.core.credentials import AzureKeyCredential from azure.identity import DefaultAzureCredential @@ -20,59 +21,63 @@ ResourceTypes, generate_account_sas, ) -from flask import Flask, jsonify, request from shared_code.status_log import State, StatusClassification, StatusLog from shared_code.tags_helper import TagsHelper -str_to_bool = {'true': True, 'false': False} -# Replace these with your own values, either in environment variables or directly here -AZURE_BLOB_STORAGE_ACCOUNT = ( - os.environ.get("AZURE_BLOB_STORAGE_ACCOUNT") or "mystorageaccount" -) -AZURE_BLOB_STORAGE_ENDPOINT = os.environ.get("AZURE_BLOB_STORAGE_ENDPOINT") -AZURE_BLOB_STORAGE_KEY = os.environ.get("AZURE_BLOB_STORAGE_KEY") -AZURE_BLOB_STORAGE_CONTAINER = ( - os.environ.get("AZURE_BLOB_STORAGE_CONTAINER") or "content" -) -AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") or "gptkb" -AZURE_SEARCH_SERVICE_ENDPOINT = os.environ.get("AZURE_SEARCH_SERVICE_ENDPOINT") -AZURE_SEARCH_SERVICE_KEY = os.environ.get("AZURE_SEARCH_SERVICE_KEY") -AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "gptkbindex" -AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "myopenai" -AZURE_OPENAI_RESOURCE_GROUP = os.environ.get("AZURE_OPENAI_RESOURCE_GROUP") or "" -AZURE_OPENAI_CHATGPT_DEPLOYMENT = ( - os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "gpt-35-turbo-16k" -) -AZURE_OPENAI_CHATGPT_MODEL_NAME = ( os.environ.get("AZURE_OPENAI_CHATGPT_MODEL_NAME") or "") -AZURE_OPENAI_CHATGPT_MODEL_VERSION = ( os.environ.get("AZURE_OPENAI_CHATGPT_MODEL_VERSION") or "") -USE_AZURE_OPENAI_EMBEDDINGS = str_to_bool.get(os.environ.get("USE_AZURE_OPENAI_EMBEDDINGS").lower()) or False -EMBEDDING_DEPLOYMENT_NAME = ( os.environ.get("EMBEDDING_DEPLOYMENT_NAME") or "") -AZURE_OPENAI_EMBEDDINGS_MODEL_NAME = ( os.environ.get("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME") or "") -AZURE_OPENAI_EMBEDDINGS_VERSION = ( os.environ.get("AZURE_OPENAI_EMBEDDINGS_VERSION") or "") - -AZURE_OPENAI_SERVICE_KEY = os.environ.get("AZURE_OPENAI_SERVICE_KEY") -AZURE_SUBSCRIPTION_ID = os.environ.get("AZURE_SUBSCRIPTION_ID") -IS_GOV_CLOUD_DEPLOYMENT = str_to_bool.get(os.environ.get("IS_GOV_CLOUD_DEPLOYMENT").lower()) or False -CHAT_WARNING_BANNER_TEXT = os.environ.get("CHAT_WARNING_BANNER_TEXT") or "" -APPLICATION_TITLE = os.environ.get("APPLICATION_TITLE") or "Information Assistant, built with Azure OpenAI" -KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content" -KB_FIELDS_PAGENUMBER = os.environ.get("KB_FIELDS_PAGENUMBER") or "pages" -KB_FIELDS_SOURCEFILE = os.environ.get("KB_FIELDS_SOURCEFILE") or "file_uri" -KB_FIELDS_CHUNKFILE = os.environ.get("KB_FIELDS_CHUNKFILE") or "chunk_file" +# === ENV Setup === + +ENV = { + "AZURE_BLOB_STORAGE_ACCOUNT": None, + "AZURE_BLOB_STORAGE_ENDPOINT": None, + "AZURE_BLOB_STORAGE_KEY": None, + "AZURE_BLOB_STORAGE_CONTAINER": "content", + "AZURE_SEARCH_SERVICE": "gptkb", + "AZURE_SEARCH_SERVICE_ENDPOINT": None, + "AZURE_SEARCH_SERVICE_KEY": None, + "AZURE_SEARCH_INDEX": "gptkbindex", + "AZURE_OPENAI_SERVICE": "myopenai", + "AZURE_OPENAI_RESOURCE_GROUP": "", + "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "gpt-35-turbo-16k", + "AZURE_OPENAI_CHATGPT_MODEL_NAME": "", + "AZURE_OPENAI_CHATGPT_MODEL_VERSION": "", + "USE_AZURE_OPENAI_EMBEDDINGS": "false", + "EMBEDDING_DEPLOYMENT_NAME": "", + "AZURE_OPENAI_EMBEDDINGS_MODEL_NAME": "", + "AZURE_OPENAI_EMBEDDINGS_VERSION": "", + "AZURE_OPENAI_SERVICE_KEY": None, + "AZURE_SUBSCRIPTION_ID": None, + "IS_GOV_CLOUD_DEPLOYMENT": "false", + "CHAT_WARNING_BANNER_TEXT": "", + "APPLICATION_TITLE": "Information Assistant, built with Azure OpenAI", + "KB_FIELDS_CONTENT": "content", + "KB_FIELDS_PAGENUMBER": "pages", + "KB_FIELDS_SOURCEFILE": "file_uri", + "KB_FIELDS_CHUNKFILE": "chunk_file", + "COSMOSDB_URL": None, + "COSMOSDB_KEY": None, + "COSMOSDB_LOG_DATABASE_NAME": "statusdb", + "COSMOSDB_LOG_CONTAINER_NAME": "statuscontainer", + "COSMOSDB_TAGS_DATABASE_NAME": "tagsdb", + "COSMOSDB_TAGS_CONTAINER_NAME": "tagscontainer", + "QUERY_TERM_LANGUAGE": "English", + "TARGET_EMBEDDINGS_MODEL": "BAAI/bge-small-en-v1.5", + "ENRICHMENT_APPSERVICE_NAME": "enrichment" +} -COSMOSDB_URL = os.environ.get("COSMOSDB_URL") -COSMODB_KEY = os.environ.get("COSMOSDB_KEY") -COSMOSDB_LOG_DATABASE_NAME = os.environ.get("COSMOSDB_LOG_DATABASE_NAME") or "statusdb" -COSMOSDB_LOG_CONTAINER_NAME = os.environ.get("COSMOSDB_LOG_CONTAINER_NAME") or "statuscontainer" -COSMOSDB_TAGS_DATABASE_NAME = os.environ.get("COSMOSDB_TAGS_DATABASE_NAME") or "tagsdb" -COSMOSDB_TAGS_CONTAINER_NAME = os.environ.get("COSMOSDB_TAGS_CONTAINER_NAME") or "tagscontainer" +for key, value in ENV.items(): + new_value = os.getenv(key) + if new_value is not None: + ENV[key] = new_value + elif value is None: + raise ValueError(f"Environment variable {key} not set") -QUERY_TERM_LANGUAGE = os.environ.get("QUERY_TERM_LANGUAGE") or "English" +str_to_bool = {'true': True, 'false': False} -TARGET_EMBEDDING_MODEL = os.environ.get("TARGET_EMBEDDINGS_MODEL") or "BAAI/bge-small-en-v1.5" -ENRICHMENT_APPSERVICE_NAME = os.environ.get("ENRICHMENT_APPSERVICE_NAME") or "enrichment" +log = logging.getLogger("uvicorn") +log.setLevel('DEBUG') +log.propagate = True # embedding_service_suffix = "xyoek" @@ -81,65 +86,65 @@ # keys for each service # If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) azure_credential = DefaultAzureCredential() -azure_search_key_credential = AzureKeyCredential(AZURE_SEARCH_SERVICE_KEY) +azure_search_key_credential = AzureKeyCredential(ENV["AZURE_SEARCH_SERVICE_KEY"]) # Used by the OpenAI SDK openai.api_type = "azure" -openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com" +openai.api_base = "https://" + ENV["AZURE_OPENAI_SERVICE"] + ".openai.azure.com/" openai.api_version = "2023-06-01-preview" # Setup StatusLog to allow access to CosmosDB for logging statusLog = StatusLog( - COSMOSDB_URL, COSMODB_KEY, COSMOSDB_LOG_DATABASE_NAME, COSMOSDB_LOG_CONTAINER_NAME + ENV["COSMOSDB_URL"], ENV["COSMOSDB_KEY"], ENV["COSMOSDB_LOG_DATABASE_NAME"], ENV["COSMOSDB_LOG_CONTAINER_NAME"] ) tagsHelper = TagsHelper( - COSMOSDB_URL, COSMODB_KEY, COSMOSDB_TAGS_DATABASE_NAME, COSMOSDB_TAGS_CONTAINER_NAME + ENV["COSMOSDB_URL"], ENV["COSMOSDB_KEY"], ENV["COSMOSDB_TAGS_DATABASE_NAME"], ENV["COSMOSDB_TAGS_CONTAINER_NAME"] ) # Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead # openai.api_type = "azure_ad" # openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default") -openai.api_key = AZURE_OPENAI_SERVICE_KEY +openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"] # Set up clients for Cognitive Search and Storage search_client = SearchClient( - endpoint=AZURE_SEARCH_SERVICE_ENDPOINT, - index_name=AZURE_SEARCH_INDEX, + endpoint=ENV["AZURE_SEARCH_SERVICE_ENDPOINT"], + index_name=ENV["AZURE_SEARCH_INDEX"], credential=azure_search_key_credential, ) blob_client = BlobServiceClient( - account_url=AZURE_BLOB_STORAGE_ENDPOINT, - credential=AZURE_BLOB_STORAGE_KEY, + account_url=ENV["AZURE_BLOB_STORAGE_ENDPOINT"], + credential=ENV["AZURE_BLOB_STORAGE_KEY"], ) -blob_container = blob_client.get_container_client(AZURE_BLOB_STORAGE_CONTAINER) +blob_container = blob_client.get_container_client(ENV["AZURE_BLOB_STORAGE_CONTAINER"]) model_name = '' model_version = '' -if (IS_GOV_CLOUD_DEPLOYMENT): - model_name = AZURE_OPENAI_CHATGPT_MODEL_NAME - model_version = AZURE_OPENAI_CHATGPT_MODEL_VERSION - embedding_model_name = AZURE_OPENAI_EMBEDDINGS_MODEL_NAME - embedding_model_version = AZURE_OPENAI_EMBEDDINGS_VERSION +if (str_to_bool.get(ENV["IS_GOV_CLOUD_DEPLOYMENT"])): + model_name = ENV["AZURE_OPENAI_CHATGPT_MODEL_NAME"] + model_version = ENV["AZURE_OPENAI_CHATGPT_MODEL_VERSION"] + embedding_model_name = ENV["AZURE_OPENAI_EMBEDDINGS_MODEL_NAME"] + embedding_model_version = ENV["AZURE_OPENAI_EMBEDDINGS_VERSION"] else: # Set up OpenAI management client openai_mgmt_client = CognitiveServicesManagementClient( credential=azure_credential, - subscription_id=AZURE_SUBSCRIPTION_ID) + subscription_id=ENV["AZURE_SUBSCRIPTION_ID"]) deployment = openai_mgmt_client.deployments.get( - resource_group_name=AZURE_OPENAI_RESOURCE_GROUP, - account_name=AZURE_OPENAI_SERVICE, - deployment_name=AZURE_OPENAI_CHATGPT_DEPLOYMENT) + resource_group_name=ENV["AZURE_OPENAI_RESOURCE_GROUP"], + account_name=ENV["AZURE_OPENAI_SERVICE"], + deployment_name=ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"]) model_name = deployment.properties.model.name model_version = deployment.properties.model.version - if USE_AZURE_OPENAI_EMBEDDINGS: + if (str_to_bool.get(ENV["USE_AZURE_OPENAI_EMBEDDINGS"])): embedding_deployment = openai_mgmt_client.deployments.get( - resource_group_name=AZURE_OPENAI_RESOURCE_GROUP, - account_name=AZURE_OPENAI_SERVICE, - deployment_name=EMBEDDING_DEPLOYMENT_NAME) + resource_group_name=ENV["AZURE_OPENAI_RESOURCE_GROUP"], + account_name=ENV["AZURE_OPENAI_SERVICE"], + deployment_name=ENV["EMBEDDING_DEPLOYMENT_NAME"]) embedding_model_name = embedding_deployment.properties.model.name embedding_model_version = embedding_deployment.properties.model.version @@ -150,64 +155,84 @@ chat_approaches = { "rrr": ChatReadRetrieveReadApproach( search_client, - AZURE_OPENAI_SERVICE, - AZURE_OPENAI_SERVICE_KEY, - AZURE_OPENAI_CHATGPT_DEPLOYMENT, - KB_FIELDS_SOURCEFILE, - KB_FIELDS_CONTENT, - KB_FIELDS_PAGENUMBER, - KB_FIELDS_CHUNKFILE, - AZURE_BLOB_STORAGE_CONTAINER, + ENV["AZURE_OPENAI_SERVICE"], + ENV["AZURE_OPENAI_SERVICE_KEY"], + ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], + ENV["KB_FIELDS_SOURCEFILE"], + ENV["KB_FIELDS_CONTENT"], + ENV["KB_FIELDS_PAGENUMBER"], + ENV["KB_FIELDS_CHUNKFILE"], + ENV["AZURE_BLOB_STORAGE_CONTAINER"], blob_client, - QUERY_TERM_LANGUAGE, + ENV["QUERY_TERM_LANGUAGE"], model_name, model_version, - IS_GOV_CLOUD_DEPLOYMENT, - TARGET_EMBEDDING_MODEL, - ENRICHMENT_APPSERVICE_NAME + str_to_bool.get(ENV["IS_GOV_CLOUD_DEPLOYMENT"]), + ENV["TARGET_EMBEDDINGS_MODEL"], + ENV["ENRICHMENT_APPSERVICE_NAME"] ) } -app = Flask(__name__) + +# Create API +app = FastAPI( + title="IA Web API", + description="A Python API to serve as Backend For the Information Assistant Web App", + version="0.1.0", + docs_url="/docs", +) + +@app.get("/", include_in_schema=False, response_class=RedirectResponse) +async def root(): + return RedirectResponse(url="/index.html") -@app.route("/", defaults={"path": "index.html"}) -@app.route("/") -def static_file(path): - """Serve static files from the 'static' directory""" - return app.send_static_file(path) +@app.post("/chat") +async def chat(request: Request): + """Chat with the bot using a given approach -@app.route("/chat", methods=["POST"]) -def chat(): - """Chat with the bot using a given approach""" - approach = request.json["approach"] + Args: + request (Request): The incoming request object + + Returns: + dict: The response containing the chat results + + Raises: + dict: The error response if an exception occurs during the chat + """ + json_body = await request.json() + approach = json_body.get("approach") try: impl = chat_approaches.get(approach) if not impl: - return jsonify({"error": "unknown approach"}), 400 - r = impl.run(request.json["history"], request.json.get("overrides") or {}) + return {"error": "unknown approach"}, 400 + r = await impl.run(json_body.get("history", []), json_body.get("overrides", {})) - # return jsonify(r) # To fix citation bug,below code is added.aparmar - return jsonify( - { + return { "data_points": r["data_points"], "answer": r["answer"], "thoughts": r["thoughts"], "citation_lookup": r["citation_lookup"], } - ) except Exception as ex: - logging.exception("Exception in /chat") - return jsonify({"error": str(ex)}), 500 + log.error(f"Error in chat:: {ex}") + raise HTTPException(status_code=500, detail=str(ex)) + +@app.get("/getblobclienturl") +async def get_blob_client_url(): + """Get a URL for a file in Blob Storage with SAS token. -@app.route("/getblobclienturl") -def get_blob_client_url(): - """Get a URL for a file in Blob Storage with SAS token""" + This function generates a Shared Access Signature (SAS) token for accessing a file in Blob Storage. + The generated URL includes the SAS token as a query parameter. + + Returns: + dict: A dictionary containing the URL with the SAS token. + """ sas_token = generate_account_sas( - AZURE_BLOB_STORAGE_ACCOUNT, - AZURE_BLOB_STORAGE_KEY, + ENV["AZURE_BLOB_STORAGE_ACCOUNT"], + ENV["AZURE_BLOB_STORAGE_KEY"], resource_types=ResourceTypes(object=True, service=True, container=True), permission=AccountSasPermissions( read=True, @@ -221,28 +246,46 @@ def get_blob_client_url(): ), expiry=datetime.utcnow() + timedelta(hours=1), ) - return jsonify({"url": f"{blob_client.url}?{sas_token}"}) - -@app.route("/getalluploadstatus", methods=["POST"]) -def get_all_upload_status(): - """Get the status of all file uploads in the last N hours""" - timeframe = request.json["timeframe"] - state = request.json["state"] + return {"url": f"{blob_client.url}?{sas_token}"} + +@app.post("/getalluploadstatus") +async def get_all_upload_status(request: Request): + """ + Get the status of all file uploads in the last N hours. + + Parameters: + - request: The HTTP request object. + + Returns: + - results: The status of all file uploads in the specified timeframe. + """ + json_body = await request.json() + timeframe = json_body.get("timeframe") + state = json_body.get("state") try: results = statusLog.read_files_status_by_timeframe(timeframe, State[state]) except Exception as ex: - logging.exception("Exception in /getalluploadstatus") - return jsonify({"error": str(ex)}), 500 - return jsonify(results) + log.exception("Exception in /getalluploadstatus") + raise HTTPException(status_code=500, detail=str(ex)) + return results + +@app.post("/logstatus") +async def logstatus(request: Request): + """ + Log the status of a file upload to CosmosDB. -@app.route("/logstatus", methods=["POST"]) -def logstatus(): - """Log the status of a file upload to CosmosDB""" + Parameters: + - request: Request object containing the HTTP request data. + + Returns: + - A dictionary with the status code 200 if successful, or an error message with status code 500 if an exception occurs. + """ try: - path = request.json["path"] - status = request.json["status"] - status_classification = StatusClassification[request.json["status_classification"].upper()] - state = State[request.json["state"].upper()] + json_body = await request.json() + path = json_body.get("path") + status = json_body.get("status") + status_classification = StatusClassification[json_body.get("status_classification").upper()] + state = State[json_body.get("state").upper()] statusLog.upsert_document(document_path=path, status=status, @@ -252,73 +295,106 @@ def logstatus(): statusLog.save_document(document_path=path) except Exception as ex: - logging.exception("Exception in /logstatus") - return jsonify({"error": str(ex)}), 500 - return jsonify({"status": 200}) + log.exception("Exception in /logstatus") + raise HTTPException(status_code=500, detail=str(ex)) + raise HTTPException(status_code=200, detail="Success") # Return AZURE_OPENAI_CHATGPT_DEPLOYMENT -@app.route("/getInfoData") -def get_info_data(): - """Get the info data for the app""" - response = jsonify( - { - "AZURE_OPENAI_CHATGPT_DEPLOYMENT": f"{AZURE_OPENAI_CHATGPT_DEPLOYMENT}", - "AZURE_OPENAI_MODEL_NAME": f"{model_name}", - "AZURE_OPENAI_MODEL_VERSION": f"{model_version}", - "AZURE_OPENAI_SERVICE": f"{AZURE_OPENAI_SERVICE}", - "AZURE_SEARCH_SERVICE": f"{AZURE_SEARCH_SERVICE}", - "AZURE_SEARCH_INDEX": f"{AZURE_SEARCH_INDEX}", - "TARGET_LANGUAGE": f"{QUERY_TERM_LANGUAGE}", - "USE_AZURE_OPENAI_EMBEDDINGS": USE_AZURE_OPENAI_EMBEDDINGS, - "EMBEDDINGS_DEPLOYMENT": f"{EMBEDDING_DEPLOYMENT_NAME}", - "EMBEDDINGS_MODEL_NAME": f"{embedding_model_name}", - "EMBEDDINGS_MODEL_VERSION": f"{embedding_model_version}", - }) +@app.get("/getInfoData") +async def get_info_data(): + """ + Get the info data for the app. + + Returns: + dict: A dictionary containing various information data for the app. + - "AZURE_OPENAI_CHATGPT_DEPLOYMENT": The deployment information for Azure OpenAI ChatGPT. + - "AZURE_OPENAI_MODEL_NAME": The name of the Azure OpenAI model. + - "AZURE_OPENAI_MODEL_VERSION": The version of the Azure OpenAI model. + - "AZURE_OPENAI_SERVICE": The Azure OpenAI service information. + - "AZURE_SEARCH_SERVICE": The Azure search service information. + - "AZURE_SEARCH_INDEX": The Azure search index information. + - "TARGET_LANGUAGE": The target language for query terms. + - "USE_AZURE_OPENAI_EMBEDDINGS": Flag indicating whether to use Azure OpenAI embeddings. + - "EMBEDDINGS_DEPLOYMENT": The deployment information for embeddings. + - "EMBEDDINGS_MODEL_NAME": The name of the embeddings model. + - "EMBEDDINGS_MODEL_VERSION": The version of the embeddings model. + """ + response = { + "AZURE_OPENAI_CHATGPT_DEPLOYMENT": ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], + "AZURE_OPENAI_MODEL_NAME": f"{model_name}", + "AZURE_OPENAI_MODEL_VERSION": f"{model_version}", + "AZURE_OPENAI_SERVICE": ENV["AZURE_OPENAI_SERVICE"], + "AZURE_SEARCH_SERVICE": ENV["AZURE_SEARCH_SERVICE"], + "AZURE_SEARCH_INDEX": ENV["AZURE_SEARCH_INDEX"], + "TARGET_LANGUAGE": ENV["QUERY_TERM_LANGUAGE"], + "USE_AZURE_OPENAI_EMBEDDINGS": ENV["USE_AZURE_OPENAI_EMBEDDINGS"], + "EMBEDDINGS_DEPLOYMENT": ENV["EMBEDDING_DEPLOYMENT_NAME"], + "EMBEDDINGS_MODEL_NAME": f"{embedding_model_name}", + "EMBEDDINGS_MODEL_VERSION": f"{embedding_model_version}", + } return response # Return AZURE_OPENAI_CHATGPT_DEPLOYMENT -@app.route("/getWarningBanner") -def get_warning_banner(): +@app.get("/getWarningBanner") +async def get_warning_banner(): """Get the warning banner text""" - response = jsonify( - { - "WARNING_BANNER_TEXT": f"{CHAT_WARNING_BANNER_TEXT}" - }) + response ={ + "WARNING_BANNER_TEXT": ENV["CHAT_WARNING_BANNER_TEXT"] + } return response -@app.route("/getcitation", methods=["POST"]) -def get_citation(): - """Get the citation for a given file""" - citation = urllib.parse.unquote(request.json["citation"]) +@app.post("/getcitation") +async def get_citation(request: Request): + """ + Get the citation for a given file + + Parameters: + request (Request): The HTTP request object + + Returns: + dict: The citation results in JSON format + """ try: + json_body = await request.json() + citation = urllib.parse.unquote(json_body.get("citation")) + blob = blob_container.get_blob_client(citation).download_blob() decoded_text = blob.readall().decode() - results = jsonify(json.loads(decoded_text)) + results = json.loads(decoded_text) except Exception as ex: - logging.exception("Exception in /getalluploadstatus") - return jsonify({"error": str(ex)}), 500 - return jsonify(results.json) + log.exception("Exception in /getalluploadstatus") + raise HTTPException(status_code=500, detail=str(ex)) + return results # Return APPLICATION_TITLE -@app.route("/getApplicationTitle") -def get_application_title(): - """Get the application title text""" - response = jsonify( - { - "APPLICATION_TITLE": f"{APPLICATION_TITLE}" - }) +@app.get("/getApplicationTitle") +async def get_application_title(): + """Get the application title text + + Returns: + dict: A dictionary containing the application title. + """ + response = { + "APPLICATION_TITLE": ENV["APPLICATION_TITLE"] + } return response -@app.route("/getalltags", methods=["GET"]) -def get_all_tags(): - """Get the status of all tags in the system""" +@app.get("/getalltags") +async def get_all_tags(): + """ + Get the status of all tags in the system + + Returns: + dict: A dictionary containing the status of all tags + """ try: results = tagsHelper.get_all_tags() except Exception as ex: - logging.exception("Exception in /getalltags") - return jsonify({"error": str(ex)}), 500 - return jsonify(results) + log.exception("Exception in /getalltags") + raise HTTPException(status_code=500, detail=str(ex)) + return results + +app.mount("/", StaticFiles(directory="static"), name="static") if __name__ == "__main__": - logging.info("IA WebApp Starting Up...") - app.run(threaded=True) + log.info("IA WebApp Starting Up...") diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index d9137d0a0..0d6f8d768 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -8,7 +8,7 @@ class Approach: documents. """ - def run(self, history: list[dict], overrides: dict) -> any: + async def run(self, history: list[dict], overrides: dict) -> any: """ Run the approach on the query and documents. Not implemented. diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 373821bf8..4fe46cb5b 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import json import re import logging import urllib.parse @@ -10,9 +9,7 @@ import openai from approaches.approach import Approach -from azure.core.credentials import AzureKeyCredential -from azure.search.documents import SearchClient -from azure.search.documents.indexes import SearchIndexClient +from azure.search.documents import SearchClient from azure.search.documents.models import RawVectorQuery from azure.search.documents.models import QueryType @@ -28,9 +25,7 @@ import tiktoken from core.messagebuilder import MessageBuilder from core.modelhelper import get_token_limit -from core.modelhelper import num_tokens_from_messages import requests -from urllib.parse import quote # Simple retrieve-then-read implementation, using the Cognitive Search and # OpenAI APIs directly. It first retrieves top documents from search, @@ -93,6 +88,8 @@ class ChatReadRetrieveReadApproach(Approach): # # Define a class variable for the base URL # EMBEDDING_SERVICE_BASE_URL = 'https://infoasst-cr-{}.azurewebsites.net' + + def __init__( self, @@ -141,7 +138,12 @@ def __init__( # def run(self, history: list[dict], overrides: dict) -> any: - def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any: + async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any: + + log = logging.getLogger("uvicorn") + log.setLevel('DEBUG') + log.propagate = True + use_semantic_captions = True if overrides.get("semantic_captions") else False top = overrides.get("top") or 3 user_persona = overrides.get("user_persona", "") @@ -165,7 +167,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A self.chatgpt_token_limit - len(user_q) ) - chat_completion = openai.ChatCompletion.create( + chat_completion = await openai.ChatCompletion.acreate( deployment_id=self.chatgpt_deployment, model=self.model_name, messages=messages, @@ -191,7 +193,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A response_data = response.json() embedded_query_vector =response_data.get('data') else: - logging.error(f"Error generating embedding:: {response.status_code}") + log.error(f"Error generating embedding:: {response.status_code}") raise Exception('Error generating embedding:', response.status_code) #vector set up for pure vector search & Hybrid search & Hybrid semantic @@ -220,7 +222,6 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A # r=self.search_client.search(search_text=None, vectors=[vector], filter="search.ismatch('upload/ospolicydocs/China, climate change and the energy transition.pdf', 'file_name')", top=top) # hybrid semantic search using semantic reranker - if (not self.is_gov_cloud_deployment and overrides.get("semantic_ranker")): r = self.search_client.search( generated_query, @@ -264,7 +265,6 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A "page_number": str(doc[self.page_number_field][0]) or "0", } - # create a single string of all the results to be used in the prompt results_text = "".join(results) if results_text == "": @@ -316,7 +316,6 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A ) # STEP 3: Generate a contextual and content-specific answer using the search results and chat history. #Added conditional block to use different system messages for different models. - if self.model_name.startswith("gpt-35-turbo"): messages = self.get_messages_from_history( system_message, @@ -337,8 +336,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A #print("System Message Tokens: ", self.num_tokens_from_string(system_message, "cl100k_base")) #print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base")) #print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base")) - - chat_completion = openai.ChatCompletion.create( + chat_completion = await openai.ChatCompletion.acreate( deployment_id=self.chatgpt_deployment, model=self.model_name, messages=messages, @@ -368,7 +366,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A #print("Few Shot Tokens: ", self.num_tokens_from_string(self.response_prompt_few_shots[0]['content'], "cl100k_base")) #print("Message Tokens: ", self.num_tokens_from_string(message_string, "cl100k_base")) - chat_completion = openai.ChatCompletion.create( + chat_completion = await openai.ChatCompletion.acreate( deployment_id=self.chatgpt_deployment, model=self.model_name, messages=messages, @@ -376,7 +374,6 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A max_tokens=1024, n=1 ) - # STEP 4: Format the response msg_to_display = '\n\n'.join([str(message) for message in messages]) @@ -458,5 +455,5 @@ def get_source_file_with_sas(self, source_file: str) -> str: ) return source_file + "?" + sas_token except Exception as error: - logging.error(f"Unable to parse source file name: {str(error)}") + print(f"Unable to parse source file name: {str(error)}") return "" \ No newline at end of file diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 518909a03..485b0c725 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -9,4 +9,7 @@ openai==0.27.0 azure-search-documents==11.4.0b11 azure-storage-blob==12.16.0 azure-cosmos == 4.3.1 -tiktoken == 0.4.0 \ No newline at end of file +tiktoken == 0.4.0 +fastapi == 0.103.2 +fastapi-utils == 0.2.1 +uvicorn == 0.23.2 \ No newline at end of file diff --git a/app/enrichment/app.py b/app/enrichment/app.py index 279e35f5f..3fa2024df 100644 --- a/app/enrichment/app.py +++ b/app/enrichment/app.py @@ -4,6 +4,8 @@ import json import logging import os +import threading +import time import re from datetime import datetime from typing import List @@ -276,9 +278,17 @@ def get_tags_and_upload_to_cosmos(blob_service_client, blob_path): tagsHelper.upsert_document(blob_path, tags_list) return tags_list - @app.on_event("startup") -@repeat_every(seconds=5, logger=log, raise_exceptions=True) +def startup_event(): + poll_thread = threading.Thread(target=poll_queue_thread) + poll_thread.daemon = True + poll_thread.start() + +def poll_queue_thread(): + while True: + poll_queue() + time.sleep(5) + def poll_queue() -> None: """Polls the queue for messages and embeds them""" @@ -294,6 +304,11 @@ def poll_queue() -> None: response = queue_client.receive_messages(max_messages=int(ENV["DEQUEUE_MESSAGE_BATCH_SIZE"])) messages = [x for x in response] + if not messages: + log.debug("No messages to process. Waiting for a couple of minutes...") + time.sleep(120) # Sleep for 2 minutes + return + target_embeddings_model = re.sub(r'[^a-zA-Z0-9_\-.]', '_', ENV["TARGET_EMBEDDINGS_MODEL"]) # Remove from queue to prevent duplicate processing from any additional instances diff --git a/app/frontend/src/components/filepicker/file-picker.tsx b/app/frontend/src/components/filepicker/file-picker.tsx index f8473a9ec..def593b3c 100644 --- a/app/frontend/src/components/filepicker/file-picker.tsx +++ b/app/frontend/src/components/filepicker/file-picker.tsx @@ -24,17 +24,17 @@ const FilePicker = ({folderPath, tags}: Props) => { // handler called when files are selected via the Dropzone component const handleOnChange = useCallback((files: any) => { + let filesArray = Array.from(files); filesArray = filesArray.map((file) => ({ id: nanoid(), file - })); - + })); setFiles(filesArray as any); setProgress(0); setUploadStarted(false); - }, []); +}, []); // handle for removing files form the files list view const handleClearFile = useCallback((id: any) => { diff --git a/docs/deployment/autoscale_sku.md b/docs/deployment/autoscale_sku.md new file mode 100644 index 000000000..688d42133 --- /dev/null +++ b/docs/deployment/autoscale_sku.md @@ -0,0 +1,149 @@ +# Autoscale Settings Documentation + +These are the current out of the box Autoscale settings. +You may find better settings to fit your needs. This document explains how this can be accomplished. + +## Azure Functions Service Plan Autoscale + +### Overview + +The Azure Functions Service Plan Autoscale settings are defined in the file located at `/infra/core/host/funcserviceplan.bicep`. These settings enable automatic scaling of the Azure Functions Service Plan based on CPU usage metrics. + + + + **File Location:** `/infra/core/host/funcserviceplan.bicep` + +#### Scaling Rules + +1. **Increase Capacity Rule:** + - **Metric:** `CpuPercentage` + - **Operator:** `GreaterThan` + - **Threshold:** `60%` + - **Time Window:** `5 minutes` + - **Scaling Action:** Increase capacity by `2` with a cooldown of `5 minutes`. + +2. **Decrease Capacity Rule:** + - **Metric:** `CpuPercentage` + - **Operator:** `LessThan` + - **Threshold:** `40%` + - **Time Window:** `5 minutes` + - **Scaling Action:** Decrease capacity by `2` with a cooldown of `2 minutes`. + + +## App Service Plan Autoscale for Enrichment App + +### Overview + +The App Service Plan Autoscale settings for the enrichment app are defined in the file located at `/infra/core/host.enrichmentappserviceplan.bicep`. These settings enable automatic scaling of the App Service Plan based on CPU usage metrics. + +### Key Parameters + +**File Location:** `/infra/core/host.enrichmentappserviceplan.bicep` + +#### Scaling Rules + +1. **Increase Capacity Rule:** + - **Metric:** `CpuPercentage` + - **Operator:** `GreaterThan` + - **Threshold:** `60%` + - **Time Window:** `5 minutes` + - **Scaling Action:** Increase capacity by `1` with a cooldown of `5 minutes`. + +2. **Decrease Capacity Rule:** + - **Metric:** `CpuPercentage` + - **Operator:** `LessThan` + - **Threshold:** `20%` + - **Time Window:** `10 minutes` + - **Scaling Action:** Decrease capacity by `1` with a cooldown of `15 minutes`. + +### Customization + +To customize the App Service Plan Autoscale settings, modify the parameters mentioned above in the specified Bicep file. And Run the `make infrastructure` command. + + + +# SKU Settings Documentation + +### Overview + +The SKU settings for all Service Plans are defined in the file located at `/infra/main.bicep`. The SKU (Stock Keeping Unit) represents the pricing tier or plan for your App Service. It defines the performance, features, and capacity of the App Service. +More information can be found [here.](https://azure.microsoft.com/en-us/pricing/details/app-service/windows/#purchase-options) + +## Web App Service Plan SKU + + +**File Location:** `/infra/main.bicep` + +#### SKU Settings + +- **Name:** `S1` +- **Capacity:** `3` + + +## Functions Service Plan SKU + + +**File Location:** `/infra/main.bicep` + +#### SKU Settings + +- **Name:** `S2` +- **Capacity:** `2` + +## Enrichment App Service Plan SKU + + +**File Location:** `/infra/main.bicep` + +#### SKU Settings + +- **Name:** `P1v3` +- **Tier:** `PremiumV3` +- **Size:** `P1v3` +- **Family:** `Pv3` +- **Capacity:** `1` + +### Enrichment Message Dequeue Parameter +There exist a property that can be set in the local.env file called `DEQUEUE_MESSAGE_BATCH_SIZE` and is defaulted in the `infra/main.bicep` and `app/enrichment/app.py` to the value of **3**. This means the app will process 3 messages from the queue at a time. This is found to be the most opitmal with the existing configuration but can be increased if you also increase the enrichment app service SKU. It is important to note that there will be issues if it is increased more than the app service SKU can handle. + +### Customization + +To customize the App Service Plans SKU settings, modify the `sku` parameters in the specified Bicep file and run the `make deploy` or `make infrastructure`command. + +This can also be adjusted in the Azure Portal. + +**Note:** Adjusting the scale or Tier can cause outages until the redeployment occurrs. + + +### Steps to Scale Up: + +>1. **Sign in to the Azure Portal:** +> - Open a web browser and navigate to the [Azure Portal](https://portal.azure.com/). +> - Log in with your Azure account credentials. + +>2. **Navigate to the App Service:** +> - In the left navigation pane, select "App Services." +> - Click on the specific App Service you want to scale. + +>3. **Access the Scale Up Blade:** +> - In the App Service menu, find and click on "Scale up (App Service plan)" in the left sidebar. + +>4. **Choose a New Pricing Tier:** +> - On the "Scale Up" blade, you'll see different pricing tiers representing various levels of resources. +> - Select the desired pricing tier that corresponds to the scale you need. + +>5. **Review and Apply Changes:** +> - Review the information about the selected pricing tier, including its features and costs. +> - Click the "Apply" or "Save" button to apply the changes. + + +### Considerations: +- **Cost Implications:** + - Be aware of the cost implications associated with higher pricing tiers. Review the Azure Pricing documentation for details on costs. + +- **Resource Limits:** + - Ensure that the new pricing tier aligns with the resource requirements of your application. Some tiers may have limitations on resources. + +- **Performance Impact:** + - Scaling up provides additional resources, potentially improving performance. However, it's essential to assess whether your application benefits from the increased resources. + diff --git a/docs/deployment/deployment.md b/docs/deployment/deployment.md index 1860a7064..0d05dd608 100644 --- a/docs/deployment/deployment.md +++ b/docs/deployment/deployment.md @@ -148,6 +148,7 @@ At this point deployment is complete. Please go to the [Using the IA Accelerator ## Need Help? -To review logs try [Using the Workbook Template](/docs/deployment/worbook_usage.md) +Check these [troubleshotting methods](/docs/deployment/troubleshooting.md). + If you need assistance with deployment or configuration of this accelerator, please leverage the Discussion forum in this repository, or reach out to your Microsoft Unified Support account manager. diff --git a/docs/deployment/statusdb_cosmos.md b/docs/deployment/statusdb_cosmos.md new file mode 100644 index 000000000..c8e9b0d97 --- /dev/null +++ b/docs/deployment/statusdb_cosmos.md @@ -0,0 +1,32 @@ +## Navigating to Azure Resource Group and Opening Cosmos Account Resource + +>1. Log in to the Azure portal. +>2. In the left-hand menu, click on "Resource groups". +>3. Select the desired resource group from the list. +>4. In the resource group overview, locate and click on the Cosmos account resource. + +![Alt text](/docs/images/cosmos_account.png) + +## Accessing Data Explorer + +>1. Once you are on the Cosmos account resource page, navigate to the left-hand menu. +>2. Under the "Settings" section, click on "Data Explorer". + +![Alt text](/docs/images/data_explorer.png) + +## Expanding the Database + +>1. In the Data Explorer, you will see a list of databases associated with the Cosmos account. +>2. Locate the "statusdb" database and click on it to expand. + +## Viewing the Items Table + +>1. Within the expanded "statusdb" database, you will find a list of containers (tables). +>2. Look for the "items" table and click on it. + +## Checking File Processing Status and Errors + +>1. Once you are on the "items" table page, you will see a list of items (documents) in the table. +>2. Each item represents a file being processed. +>3. Look for the "status" field to see the status of each file being processed. +>4. If there are any associated errors, they will be displayed in the "errors" field. diff --git a/docs/deployment/troubleshooting.md b/docs/deployment/troubleshooting.md new file mode 100644 index 000000000..c9110d6ea --- /dev/null +++ b/docs/deployment/troubleshooting.md @@ -0,0 +1,31 @@ +# Troubleshooting + + +## Infrastructure Deployment +Please see below sections for troubleshooting the solution depending on what area of the process that is giving issue. + + +If you are having issues with infrastructure deployment then the errors should be apparent in the make deploy output. + +You can also navigate to the Subscription in Azure portal, click the option for "Deployments" and find your deployment and related details and errors there. + +Take the full error and logs and post them to this github repo Issues tab with your configuration used. + +More info can be found [here](https://learn.microsoft.com/en-us/azure/azure-resource-manager/templates/deployment-history?tabs=azure-portal) + +## File Processing + +If you encounter issues processing file(s) then you will want to look at CosmosDB. StatusDB's items table will hold a step by step status of each file. +Check out this section for more details [CosmosDB Usage](/docs/deployment/statusdb_cosmos.md). + +For more information on how to use Cosmos, look [here](https://learn.microsoft.com/en-us/azure/cosmos-db/data-explorer). + + +## Log Analytics Workbook + +WebApp logs, Function logs and App Service logs can be found in Log Analytics Workspace. + +There exist in this solution a workbook with default queries that can be used to explore and troubleshoot further. +Check out the section [Workbook Usage](/docs/deployment/worbook_usage.md). + +For more information on log analytics and kusto query language, look [here](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/queries?tabs=groupby). \ No newline at end of file diff --git a/docs/features/optional_features.md b/docs/features/optional_features.md index b0634a3b1..21fc6c50f 100644 --- a/docs/features/optional_features.md +++ b/docs/features/optional_features.md @@ -10,6 +10,7 @@ Please see below sections for coverage of IA Accelerator optional features. * [Customer Usage Attribution](/docs/features/features.md#customer-usage-attribution) * [Sovereign Region Deployment](/docs/features/features.md#sovereign-region-deployment) * [Configure REST API access](#configure-rest-api-access) +* [Customize Autoscale and App Service SKU's](#customize-autoscale) ## Configuring your own language ENV file @@ -45,3 +46,8 @@ Check out how to [setup Sovereign Region Deployment](/docs/deployment/enable_sov If you are wanting to use the API stand-alone or use a custom UI. Check out how to [enable OAuth Client Credentials Flow](/docs/deployment/client_credentials_flow.md) + +## Customize Autoscale + +If you want to learn more about Autoscale Settings and App Service SKU's +Check out how to [customize Autoscale settings](/docs/deployment/autoscale_sku.md) diff --git a/docs/images/cosmos_account.png b/docs/images/cosmos_account.png new file mode 100644 index 000000000..11e3617c6 Binary files /dev/null and b/docs/images/cosmos_account.png differ diff --git a/docs/images/data_explorer.png b/docs/images/data_explorer.png new file mode 100644 index 000000000..b677733f3 Binary files /dev/null and b/docs/images/data_explorer.png differ diff --git a/docs/process_flow.drawio.png b/docs/process_flow.drawio.png index 08bd7d5ea..b69d6f7d2 100644 Binary files a/docs/process_flow.drawio.png and b/docs/process_flow.drawio.png differ diff --git a/functions/host.json b/functions/host.json index 58badc323..b13fe7eea 100644 --- a/functions/host.json +++ b/functions/host.json @@ -12,9 +12,14 @@ "id": "Microsoft.Azure.Functions.ExtensionBundle", "version": "[3.*, 4.0.0)" }, + "extensions": { + "queues": { + "batchSize": 3 + } + }, "concurrency": { - "dynamicConcurrencyEnabled": true, - "snapshotPersistenceEnabled": true + "dynamicConcurrencyEnabled": false, + "snapshotPersistenceEnabled": false }, - "functionTimeout": "01:00:00" + "functionTimeout": "02:00:00" } \ No newline at end of file diff --git a/infra/core/host/enrichmentappserviceplan.bicep b/infra/core/host/enrichmentappserviceplan.bicep index fcf18b103..c01ebe20c 100644 --- a/infra/core/host/enrichmentappserviceplan.bicep +++ b/infra/core/host/enrichmentappserviceplan.bicep @@ -6,8 +6,6 @@ param kind string = '' param reserved bool = true param sku object -param storageAccountId string - // Create an App Service Plan to group applications under the same payment plan and SKU, specifically for containers resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = { @@ -40,29 +38,45 @@ resource scaleOutRule 'Microsoft.Insights/autoscalesettings@2022-10-01' = { { name: 'Scale out condition' capacity: { - maximum: '3' + maximum: '5' default: '1' minimum: '1' } rules: [ { + metricTrigger: { + metricName: 'CpuPercentage' + metricResourceUri: appServicePlan.id + timeGrain: 'PT1M' + statistic: 'Average' + timeWindow: 'PT5M' + timeAggregation: 'Average' + operator: 'GreaterThan' + threshold: 60 + } scaleAction: { direction: 'Increase' type: 'ChangeCount' value: '1' cooldown: 'PT5M' } + } + { metricTrigger: { - metricName: 'ApproximateMessageCount' - metricNamespace: '' - metricResourceUri: storageAccountId - operator: 'GreaterThan' - statistic: 'Average' - threshold: 10 - timeAggregation: 'Average' + metricName: 'CpuPercentage' + metricResourceUri: appServicePlan.id timeGrain: 'PT1M' + statistic: 'Average' timeWindow: 'PT10M' - dividePerInstance: true + timeAggregation: 'Average' + operator: 'LessThan' + threshold: 20 + } + scaleAction: { + direction: 'Decrease' + type: 'ChangeCount' + value: '1' + cooldown: 'PT15M' } } ] diff --git a/infra/core/host/funcserviceplan.bicep b/infra/core/host/funcserviceplan.bicep index 72ae143b6..4379b8285 100644 --- a/infra/core/host/funcserviceplan.bicep +++ b/infra/core/host/funcserviceplan.bicep @@ -17,5 +17,62 @@ resource funcServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = { } } +resource autoscaleSettings 'Microsoft.Insights/autoscalesettings@2022-10-01' = { + name: '${funcServicePlan.name}-Autoscale' + location: location + properties: { + enabled: true + profiles: [ + { + name: '${funcServicePlan.name}-Autoscale' + capacity: { + default: '2' + minimum: '2' + maximum: '10' + } + rules: [ + { + metricTrigger: { + metricName: 'CpuPercentage' + metricResourceUri: funcServicePlan.id + timeGrain: 'PT1M' + statistic: 'Average' + timeWindow: 'PT5M' + timeAggregation: 'Average' + operator: 'GreaterThan' + threshold: 60 + } + scaleAction: { + direction: 'Increase' + type: 'ChangeCount' + value: '2' + cooldown: 'PT5M' + } + } + { + metricTrigger: { + metricName: 'CpuPercentage' + metricResourceUri: funcServicePlan.id + timeGrain: 'PT1M' + statistic: 'Average' + timeWindow: 'PT5M' + timeAggregation: 'Average' + operator: 'LessThan' + threshold: 40 + } + scaleAction: { + direction: 'Decrease' + type: 'ChangeCount' + value: '2' + cooldown: 'PT2M' + } + } + ] + } + ] + targetResourceUri: funcServicePlan.id + } +} + output id string = funcServicePlan.id output name string = funcServicePlan.name diff --git a/infra/main.bicep b/infra/main.bicep index 9b50a0c72..751d69a1c 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -155,8 +155,8 @@ module funcServicePlan 'core/host/funcserviceplan.bicep' = { location: location tags: tags sku: { - name: 'S3' - capacity: 5 + name: 'S2' + capacity: 2 } kind: 'linux' } @@ -175,11 +175,10 @@ module enrichmentAppServicePlan 'core/host/enrichmentappserviceplan.bicep' = { tier: 'PremiumV3' size: 'P1v3' family: 'Pv3' - capacity: 3 + capacity: 1 } kind: 'linux' reserved: true - storageAccountId: '/subscriptions/${subscriptionId}/resourceGroups/${rg.name}/providers/Microsoft.Storage/storageAccounts/${storage.outputs.name}/services/queue/queues/${embeddingsQueue}' } } @@ -204,7 +203,7 @@ module enrichmentApp 'core/host/enrichmentappservice.bicep' = { AZURE_BLOB_STORAGE_KEY: storage.outputs.key EMBEDDINGS_QUEUE: embeddingsQueue LOG_LEVEL: 'DEBUG' - DEQUEUE_MESSAGE_BATCH_SIZE: 1 + DEQUEUE_MESSAGE_BATCH_SIZE: 3 AZURE_BLOB_STORAGE_ACCOUNT: storage.outputs.name AZURE_BLOB_STORAGE_CONTAINER: containerName AZURE_BLOB_STORAGE_UPLOAD_CONTAINER: uploadContainerName @@ -246,6 +245,7 @@ module backend 'core/host/appservice.bicep' = { runtimeVersion: '3.10' scmDoBuildDuringDeployment: true managedIdentity: true + appCommandLine: 'gunicorn --workers 2 --worker-class uvicorn.workers.UvicornWorker app:app --timeout 600' applicationInsightsName: logging.outputs.applicationInsightsName logAnalyticsWorkspaceName: logging.outputs.logAnalyticsName isGovCloudDeployment: isGovCloudDeployment diff --git a/scripts/inf-create.sh b/scripts/inf-create.sh index 4793b102d..855583f1f 100755 --- a/scripts/inf-create.sh +++ b/scripts/inf-create.sh @@ -87,6 +87,7 @@ if [ -n "${IN_AUTOMATION}" ]; then echo "Please create the Azure AD objects using the script at /scripts/create-ad-objs-for-deployment.sh and set the AD_WEBAPP_CLIENT_ID pipeline variable in Azure DevOps." exit 1 fi + aadWebSPId=$ARM_SERVICE_PRINCIPAL_ID aadMgmtAppId=$AD_MGMTAPP_CLIENT_ID aadMgmtAppSecret=$AD_MGMTAPP_CLIENT_SECRET aadMgmtSPId=$AD_MGMT_SERVICE_PRINCIPAL_ID @@ -196,6 +197,9 @@ echo $parameter_json > $DIR/../infra/main.parameters.json #make sure bicep is always the latest version az bicep upgrade +#Check and Remove if exists the CUA deployment Object to resolve Bicep limitations +az deployment sub delete --name "pid-${CUSTOMER_USAGE_ATTRIBUTION_ID}" + #deploy bicep az deployment sub what-if --location $LOCATION --template-file main.bicep --parameters main.parameters.json --name $RG_NAME if [ -z $SKIP_PLAN_CHECK ] @@ -215,4 +219,4 @@ results=$(az deployment sub create --location $LOCATION --template-file main.bic #save deployment output printInfo "Writing output to infra_output.json" pushd "$DIR/.." -echo $results > infra_output.json +echo $results > infra_output.json \ No newline at end of file