Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UN-1920 Fix:Dynamic passing of File storage init #901

Merged
merged 9 commits into from
Dec 19, 2024
13 changes: 13 additions & 0 deletions backend/prompt_studio/prompt_studio_core_v2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class ToolStudioPromptKeys:
RECORD = "record"
FILE_PATH = "file_path"
ENABLE_HIGHLIGHT = "enable_highlight"
EXECUTION_SOURCE = "execution_source"


class FileViewTypes:
Expand Down Expand Up @@ -132,3 +133,15 @@ class DefaultPrompts:
"Do not include any explanation in the reply. "
"Only include the extracted information in the reply."
)


class ExecutionSource(Enum):
"""Enum to indicate the source of invocation.
Any new sources can be added to this enum.
This is to indicate the prompt service.

Args:
Enum (_type_): ide/tool
"""

IDE = "ide"
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from prompt_studio.prompt_profile_manager_v2.profile_manager_helper import (
ProfileManagerHelper,
)
from prompt_studio.prompt_studio_core_v2.constants import IndexingStatus, LogLevels
from prompt_studio.prompt_studio_core_v2.constants import (
ExecutionSource,
IndexingStatus,
LogLevels,
)
from prompt_studio.prompt_studio_core_v2.constants import (
ToolStudioPromptKeys as TSPKeys,
)
Expand Down Expand Up @@ -1176,6 +1180,7 @@ def _fetch_single_pass_response(
TSPKeys.FILE_HASH: file_hash,
TSPKeys.FILE_NAME: doc_name,
Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value,
}

util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id)
Expand Down
18 changes: 18 additions & 0 deletions prompt-service/src/unstract/prompt_service/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class PromptServiceContants:
FILE_PATH = "file_path"
HIGHLIGHT_DATA = "highlight_data"
CONFIDENCE_DATA = "confidence_data"
EXECUTION_SOURCE = "execution_source"
METRICS = "metrics"


Expand Down Expand Up @@ -101,3 +102,20 @@ class DBTableV2:
PROMPT_STUDIO_REGISTRY = "prompt_studio_registry"
PLATFORM_KEY = "platform_key"
TOKEN_USAGE = "usage"


class FileStorageKeys:
FILE_STORAGE_PROVIDER = "FILE_STORAGE_PROVIDER"
FILE_STORAGE_CREDENTIALS = "FILE_STORAGE_CREDENTIALS"
PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE"
TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE"


class FileStorageType(Enum):
PERMANENT = "permanent"
TEMPORARY = "temporary"


class ExecutionSource(Enum):
IDE = "ide"
TOOL = "tool"
68 changes: 61 additions & 7 deletions prompt-service/src/unstract/prompt_service/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from dotenv import load_dotenv
from flask import Flask, current_app
from unstract.prompt_service.config import db
from unstract.prompt_service.constants import DBTableV2
from unstract.prompt_service.constants import (
DBTableV2,
ExecutionSource,
FeatureFlag,
FileStorageKeys,
)
from unstract.prompt_service.constants import PromptServiceContants as PSKeys
from unstract.prompt_service.db_utils import DBUtils
from unstract.prompt_service.env_manager import EnvLoader
Expand All @@ -16,6 +21,13 @@
from unstract.sdk.exceptions import SdkError
from unstract.sdk.llm import LLM

from unstract.flags.src.unstract.flags.feature_flag import check_feature_flag_status

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
from unstract.sdk.file_storage.constants import StorageType
from unstract.sdk.file_storage.env_helper import EnvHelper

load_dotenv()

# Global variable to store plugins
Expand Down Expand Up @@ -278,6 +290,7 @@ def run_completion(
prompt_type: Optional[str] = PSKeys.TEXT,
enable_highlight: bool = False,
file_path: str = "",
execution_source: Optional[str] = None,
) -> str:
logger: Logger = current_app.logger
try:
Expand All @@ -286,9 +299,27 @@ def run_completion(
)
highlight_data = None
if highlight_data_plugin and enable_highlight:
highlight_data = highlight_data_plugin["entrypoint_cls"](
logger=current_app.logger, file_path=file_path
).run
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
fs_instance: FileStorage = FileStorage(FileStorageProvider.LOCAL)
if execution_source == ExecutionSource.IDE.value:
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.PERMANENT,
env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
)
if execution_source == ExecutionSource.TOOL.value:
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.TEMPORARY,
env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE,
)
highlight_data = highlight_data_plugin["entrypoint_cls"](
logger=current_app.logger,
file_path=file_path,
fs_instance=fs_instance,
).run
else:
highlight_data = highlight_data_plugin["entrypoint_cls"](
logger=current_app.logger, file_path=file_path
).run
completion = llm.complete(
prompt=prompt,
process_text=highlight_data,
Expand Down Expand Up @@ -325,6 +356,7 @@ def extract_table(
structured_output: dict[str, Any],
llm: LLM,
enforce_type: str,
execution_source: str,
) -> dict[str, Any]:
table_settings = output[PSKeys.TABLE_SETTINGS]
table_extractor: dict[str, Any] = plugins.get("table-extractor", {})
Expand All @@ -333,10 +365,32 @@ def extract_table(
"Unable to extract table details. "
"Please contact admin to resolve this issue."
)
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
fs_instance: FileStorage = FileStorage(FileStorageProvider.LOCAL)
if execution_source == ExecutionSource.IDE.value:
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.PERMANENT,
env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
)
if execution_source == ExecutionSource.TOOL.value:
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.TEMPORARY,
env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE,
)
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
try:
answer = table_extractor["entrypoint_cls"].extract_large_table(
llm=llm, table_settings=table_settings, enforce_type=enforce_type
)
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
answer = table_extractor["entrypoint_cls"].extract_large_table(
llm=llm,
table_settings=table_settings,
enforce_type=enforce_type,
fs_instance=fs_instance,
)
else:
answer = table_extractor["entrypoint_cls"].extract_large_table(
llm=llm,
table_settings=table_settings,
enforce_type=enforce_type,
)
structured_output[output[PSKeys.NAME]] = answer
# We do not support summary and eval for table.
# Hence returning the result
Expand Down
3 changes: 3 additions & 0 deletions prompt-service/src/unstract/prompt_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ def prompt_processor() -> Any:
}
metrics: dict = {}
variable_names: list[str] = []
# Identifier for source of invocation
execution_source = payload.get(PSKeys.EXECUTION_SOURCE, "")
harini-venkataraman marked this conversation as resolved.
Show resolved Hide resolved
publish_log(
log_events_id,
{"tool_id": tool_id, "run_id": run_id, "doc_name": doc_name},
Expand Down Expand Up @@ -226,6 +228,7 @@ def prompt_processor() -> Any:
structured_output=structured_output,
llm=llm,
enforce_type=output[PSKeys.TYPE],
execution_source=execution_source,
)
metadata = query_usage_metadata(token=platform_key, metadata=metadata)
response = {
Expand Down
2 changes: 2 additions & 0 deletions tools/structure/src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,7 @@ class SettingsKeys:
CONFIDENCE_DATA = "confidence_data"
EXECUTION_RUN_DATA_FOLDER = "EXECUTION_RUN_DATA_FOLDER"
FILE_PATH = "file_path"
EXECUTION_SOURCE = "execution_source"
TOOL = "tool"
METRICS = "metrics"
INDEXING = "indexing"
1 change: 1 addition & 0 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def run(
SettingsKeys.FILE_HASH: file_hash,
SettingsKeys.FILE_NAME: file_name,
SettingsKeys.FILE_PATH: extracted_input_file,
SettingsKeys.EXECUTION_SOURCE: SettingsKeys.TOOL,
}
# TODO: Need to split extraction and indexing
# to avoid unwanted indexing
Expand Down
Loading