Skip to content

Commit

Permalink
Support use of AzureOpenAI proxy by prepdocs (Azure-Samples#1760)
Browse files Browse the repository at this point in the history
* Support custom URL for prepdocs

* update upload

* pwsh

* pwsh

* Move logic to embeddings

* Keep service variable

* Make mypy happy

* Fix tests

* Test coverage amount

* Update E2E
  • Loading branch information
pamelafox authored Jun 24, 2024
1 parent e505ab7 commit 28536f6
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
run: black . --check --verbose
- name: Run Python tests
if: runner.os != 'Windows'
run: python3 -m pytest -s -vv --cov --cov-fail-under=87
run: python3 -m pytest -s -vv --cov --cov-fail-under=86
- name: Run E2E tests with Playwright
id: e2e
if: runner.os != 'Windows'
Expand Down
10 changes: 7 additions & 3 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ async def setup_clients():
os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
)
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL")
AZURE_VISION_ENDPOINT = os.getenv("AZURE_VISION_ENDPOINT", "")
# Used only with non-Azure OpenAI deployments
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
Expand Down Expand Up @@ -499,6 +500,7 @@ async def setup_clients():
openai_host=OPENAI_HOST,
openai_model_name=OPENAI_EMB_MODEL,
openai_service=AZURE_OPENAI_SERVICE,
openai_custom_url=AZURE_OPENAI_CUSTOM_URL,
openai_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
openai_dimensions=OPENAI_EMB_DIMENSIONS,
openai_key=clean_key_if_exists(OPENAI_API_KEY),
Expand Down Expand Up @@ -527,12 +529,14 @@ async def setup_clients():

if OPENAI_HOST.startswith("azure"):
api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview"

if OPENAI_HOST == "azure_custom":
endpoint = os.environ["AZURE_OPENAI_CUSTOM_URL"]
if not AZURE_OPENAI_CUSTOM_URL:
raise ValueError("AZURE_OPENAI_CUSTOM_URL must be set when OPENAI_HOST is azure_custom")
endpoint = AZURE_OPENAI_CUSTOM_URL
else:
if not AZURE_OPENAI_SERVICE:
raise ValueError("AZURE_OPENAI_SERVICE must be set when OPENAI_HOST is azure")
endpoint = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"

if api_key := os.getenv("AZURE_OPENAI_API_KEY"):
openai_client = AsyncAzureOpenAI(api_version=api_version, azure_endpoint=endpoint, api_key=api_key)
else:
Expand Down
11 changes: 10 additions & 1 deletion app/backend/prepdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def setup_embeddings_service(
openai_host: str,
openai_model_name: str,
openai_service: Union[str, None],
openai_custom_url: Union[str, None],
openai_deployment: Union[str, None],
openai_dimensions: int,
openai_key: Union[str, None],
Expand All @@ -126,6 +127,7 @@ def setup_embeddings_service(
)
return AzureOpenAIEmbeddingService(
open_ai_service=openai_service,
open_ai_custom_url=openai_custom_url,
open_ai_deployment=openai_deployment,
open_ai_model_name=openai_model_name,
open_ai_dimensions=openai_dimensions,
Expand Down Expand Up @@ -309,10 +311,16 @@ async def main(strategy: Strategy, setup_index: bool = True):
parser.add_argument(
"--disablebatchvectors", action="store_true", help="Don't compute embeddings in batch for the sections"
)

parser.add_argument(
"--openaicustomurl",
required=False,
help="Optional. Use this custom OpenAI URL instead of the default OpenAI URL",
)
parser.add_argument(
"--openaikey",
required=False,
help="Optional. Use this Azure OpenAI account key instead of the current user identity to login (use az login to set current user for Azure). This is required only when using non-Azure endpoints.",
help="Optional. Use this OpenAI account key instead of the current Azure user identity to login.",
)
parser.add_argument("--openaiorg", required=False, help="This is required only when using non-Azure endpoints.")
parser.add_argument(
Expand Down Expand Up @@ -419,6 +427,7 @@ async def main(strategy: Strategy, setup_index: bool = True):
openai_host=args.openaihost,
openai_model_name=args.openaimodelname,
openai_service=args.openaiservice,
openai_custom_url=args.openaicustomurl,
openai_deployment=args.openaideployment,
openai_dimensions=args.openaidimensions,
openai_key=clean_key_if_exists(args.openaikey),
Expand Down
9 changes: 8 additions & 1 deletion app/backend/prepdocslib/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,17 @@ def __init__(
open_ai_model_name: str,
open_ai_dimensions: int,
credential: Union[AsyncTokenCredential, AzureKeyCredential],
open_ai_custom_url: Union[str, None] = None,
disable_batch: bool = False,
):
super().__init__(open_ai_model_name, open_ai_dimensions, disable_batch)
self.open_ai_service = open_ai_service
if open_ai_service:
self.open_ai_endpoint = f"https://{open_ai_service}.openai.azure.com"
elif open_ai_custom_url:
self.open_ai_endpoint = open_ai_custom_url
else:
raise ValueError("Either open_ai_service or open_ai_custom_url must be provided")
self.open_ai_deployment = open_ai_deployment
self.credential = credential

Expand All @@ -187,7 +194,7 @@ class AuthArgs(TypedDict, total=False):
raise TypeError("Invalid credential type")

return AsyncAzureOpenAI(
azure_endpoint=f"https://{self.open_ai_service}.openai.azure.com",
azure_endpoint=self.open_ai_endpoint,
azure_deployment=self.open_ai_deployment,
api_version="2023-05-15",
**auth_args,
Expand Down
7 changes: 7 additions & 0 deletions scripts/prepdocs.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ if ($env:USE_FEATURE_INT_VECTORIZATION) {
$integratedVectorizationArg = "--useintvectorization $env:USE_FEATURE_INT_VECTORIZATION"
}

if ($env:AZURE_OPENAI_API_KEY) {
$openaiApiKey = $env:AZURE_OPENAI_API_KEY
} else {
$openaiApiKey = $env:OPENAI_API_KEY
}

$cwd = (Get-Location)
$dataArg = "`"$cwd/data/*`""

Expand All @@ -75,6 +81,7 @@ $argumentList = "./app/backend/prepdocs.py $dataArg --verbose " + `
"$searchAnalyzerNameArg " + `
"--openaihost `"$env:OPENAI_HOST`" --openaimodelname `"$env:AZURE_OPENAI_EMB_MODEL_NAME`" $openaiDimensionsArg " + `
"--openaiservice `"$env:AZURE_OPENAI_SERVICE`" --openaideployment `"$env:AZURE_OPENAI_EMB_DEPLOYMENT`" " + `
"--openaicustomurl `"$env:OPENAI_CUSTOM_URL`" " + `
"--openaikey `"$env:OPENAI_API_KEY`" --openaiorg `"$env:OPENAI_ORGANIZATION`" " + `
"--documentintelligenceservice $env:AZURE_DOCUMENTINTELLIGENCE_SERVICE " + `
"$searchImagesArg $visionEndpointArg " + `
Expand Down
8 changes: 7 additions & 1 deletion scripts/prepdocs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ if [ -n "$USE_FEATURE_INT_VECTORIZATION" ]; then
integratedVectorizationArg="--useintvectorization $USE_FEATURE_INT_VECTORIZATION"
fi

if [ -n "$AZURE_OPENAI_API_KEY" ]; then
openAiApiKeyArg="$AZURE_OPENAI_API_KEY"
elif [ -n "$OPENAI_API_KEY" ]; then
openAiApiKeyArg="$OPENAI_API_KEY"
fi

./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose \
--subscriptionid $AZURE_SUBSCRIPTION_ID \
Expand All @@ -71,7 +76,8 @@ fi
$searchAnalyzerNameArg \
--openaihost "$OPENAI_HOST" --openaimodelname "$AZURE_OPENAI_EMB_MODEL_NAME" $openAiDimensionsArg \
--openaiservice "$AZURE_OPENAI_SERVICE" --openaideployment "$AZURE_OPENAI_EMB_DEPLOYMENT" \
--openaikey "$OPENAI_API_KEY" --openaiorg "$OPENAI_ORGANIZATION" \
--openaicustomurl "$AZURE_OPENAI_CUSTOM_URL" \
--openaikey $openAiApiKeyArg --openaiorg "$OPENAI_ORGANIZATION" \
--documentintelligenceservice "$AZURE_DOCUMENTINTELLIGENCE_SERVICE" \
$searchImagesArg $visionEndpointArg \
$adlsGen2StorageAccountArg $adlsGen2FilesystemArg $adlsGen2FilesystemPathArg \
Expand Down
1 change: 1 addition & 0 deletions tests/e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def run_server(port: int):
"AZURE_SEARCH_SERVICE": "test-search-service",
"AZURE_SPEECH_SERVICE_ID": "test-id",
"AZURE_SPEECH_SERVICE_LOCATION": "eastus",
"AZURE_OPENAI_SERVICE": "test-openai-service",
"AZURE_OPENAI_CHATGPT_MODEL": "gpt-35-turbo",
},
clear=True,
Expand Down
1 change: 1 addition & 0 deletions tests/test_app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def minimal_env(monkeypatch):
monkeypatch.setenv("AZURE_STORAGE_CONTAINER", "test-storage-container")
monkeypatch.setenv("AZURE_SEARCH_INDEX", "test-search-index")
monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service")
monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service")
monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo")
yield

Expand Down

0 comments on commit 28536f6

Please sign in to comment.