Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:add rag awel operator view metadata. #1174

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dbgpt/core/awel/flow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def __init__(self, label: str, description: str):
"output_parser": _CategoryDetail("Output Parser", "Parse the output of LLM model"),
"common": _CategoryDetail("Common", "The common operator"),
"agent": _CategoryDetail("Agent", "The agent operator"),
"rag": _CategoryDetail("RAG", "The RAG operator"),
}


Expand All @@ -124,6 +125,7 @@ class OperatorCategory(str, Enum):
OUTPUT_PARSER = "output_parser"
COMMON = "common"
AGENT = "agent"
RAG = "rag"

def label(self) -> str:
"""Get the label of the category."""
Expand Down Expand Up @@ -163,6 +165,7 @@ class OperatorType(str, Enum):
"common": _CategoryDetail("Common", "The common resource"),
"prompt": _CategoryDetail("Prompt", "The prompt resource"),
"agent": _CategoryDetail("Agent", "The agent resource"),
"rag": _CategoryDetail("RAG", "The resource"),
}


Expand All @@ -176,6 +179,7 @@ class ResourceCategory(str, Enum):
COMMON = "common"
PROMPT = "prompt"
AGENT = "agent"
RAG = "rag"

def label(self) -> str:
"""Get the label of the category."""
Expand Down
51 changes: 51 additions & 0 deletions dbgpt/core/awel/trigger/http_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,3 +1031,54 @@ def __init__(self, key: str = "user_input", **kwargs):
async def map(self, request_body: CommonLLMHttpRequestBody) -> Dict[str, Any]:
"""Map the request body to response body."""
return {self._key: request_body.messages}


class RequestedParsedOperator(MapOperator[CommonLLMHttpRequestBody, str]):
"""User input parsed operator."""

metadata = ViewMetadata(
label="Request Body Parsed To String Operator",
name="request_body_to_str__parsed_operator",
category=OperatorCategory.COMMON,
parameters=[
Parameter.build_from(
"Key",
"key",
str,
optional=True,
default="",
description="The key of the dict, link 'user_input'",
)
],
inputs=[
IOField.build_from(
"Request Body",
"request_body",
CommonLLMHttpRequestBody,
description="The request body of the API endpoint",
)
],
outputs=[
IOField.build_from(
"User Input String",
"user_input_str",
str,
description="The user input dict of the API endpoint",
)
],
description="User input parsed operator",
)

def __init__(self, key: str = "user_input", **kwargs):
"""Initialize a UserInputParsedOperator."""
self._key = key
super().__init__(**kwargs)

async def map(self, request_body: CommonLLMHttpRequestBody) -> str:
"""Map the request body to response body."""
dict_value = request_body.dict()
if not self._key or self._key not in dict_value:
raise ValueError(
f"Prefix key {self._key} is not a valid key of the request body"
)
return dict_value[self._key]
39 changes: 39 additions & 0 deletions dbgpt/core/interface/operators/llm_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,42 @@ async def transform_stream(self, output_iter: AsyncIterator[ModelOutput]):
decoded_unicode = model_output.text.replace("\ufffd", "")
msg = decoded_unicode.replace("\n", "\\n")
yield f"data:{msg}\n\n"


class StringOutput2ModelOutputOperator(MapOperator[str, ModelOutput]):
"""Map String to ModelOutput."""

metadata = ViewMetadata(
label="Map String to ModelOutput",
name="string_2_model_output_operator",
category=OperatorCategory.COMMON,
description="Map String to ModelOutput.",
parameters=[],
inputs=[
IOField.build_from(
"String",
"input_value",
str,
description="The input value of the operator.",
),
],
outputs=[
IOField.build_from(
"Model Output",
"input_value",
ModelOutput,
description="The input value of the operator.",
),
],
)

def __int__(self, **kwargs):
"""Create a new operator."""
super().__init__(**kwargs)

async def map(self, input_value: str) -> ModelOutput:
"""Map the model output to the common response body."""
return ModelOutput(
text=input_value,
error_code=500,
)
72 changes: 69 additions & 3 deletions dbgpt/rag/operators/knowledge.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,92 @@
from typing import Any, List, Optional

from dbgpt.core.awel import MapOperator
from dbgpt.core.awel.flow import (
IOField,
OperatorCategory,
OptionValue,
Parameter,
ViewMetadata,
)
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType
from dbgpt.rag.knowledge.factory import KnowledgeFactory


class KnowledgeOperator(MapOperator[Any, Any]):
"""Knowledge Operator."""
"""Knowledge Factory Operator."""

metadata = ViewMetadata(
label="Knowledge Factory Operator",
name="knowledge_operator",
category=OperatorCategory.RAG,
description="The knowledge operator.",
inputs=[
IOField.build_from(
"knowledge datasource",
"knowledge datasource",
dict,
"knowledge datasource",
)
],
outputs=[
IOField.build_from(
"Knowledge",
"Knowledge",
Knowledge,
description="Knowledge",
)
],
parameters=[
Parameter.build_from(
label="datasource",
name="datasource",
type=str,
optional=True,
default="DOCUMENT",
description="datasource",
),
Parameter.build_from(
label="knowledge_type",
name="knowledge type",
type=str,
optional=True,
options=[
OptionValue(
label="DOCUMENT",
name="DOCUMENT",
value=KnowledgeType.DOCUMENT.name,
),
OptionValue(label="URL", name="URL", value=KnowledgeType.URL.name),
OptionValue(
label="TEXT", name="TEXT", value=KnowledgeType.TEXT.name
),
],
default=KnowledgeType.DOCUMENT.name,
description="knowledge type",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self, knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT, **kwargs
self,
datasource: Optional[str] = None,
knowledge_type: Optional[str] = KnowledgeType.DOCUMENT.name,
**kwargs
):
"""Init the query rewrite operator.
Args:
knowledge_type: (Optional[KnowledgeType]) The knowledge type.
"""
super().__init__(**kwargs)
self._knowledge_type = knowledge_type
self._datasource = datasource
self._knowledge_type = KnowledgeType.get_by_value(knowledge_type)

async def map(self, datasource: IN) -> Knowledge:
"""knowledge operator."""
if self._datasource:
datasource = self._datasource
return await self.blocking_func_to_async(
KnowledgeFactory.create, datasource, self._knowledge_type
)
54 changes: 54 additions & 0 deletions dbgpt/rag/operators/rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,67 @@

from dbgpt.core import LLMClient
from dbgpt.core.awel import MapOperator
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.retriever.rewrite import QueryRewrite


class QueryRewriteOperator(MapOperator[Any, Any]):
"""The Rewrite Operator."""

metadata = ViewMetadata(
label="Query Rewrite Operator",
name="query_rewrite_operator",
category=OperatorCategory.RAG,
description="query rewrite operator.",
inputs=[
IOField.build_from("query_context", "query_context", dict, "query context")
],
outputs=[
IOField.build_from(
"rewritten queries",
"queries",
List[str],
description="rewritten queries",
)
],
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
Parameter.build_from(
label="model name",
name="model_name",
type=str,
optional=True,
default="gpt-3.5-turbo",
description="llm model name",
),
Parameter.build_from(
label="prompt language",
name="language",
type=str,
optional=True,
default="en",
description="prompt language",
),
Parameter.build_from(
label="nums",
name="nums",
type=int,
optional=True,
default=5,
description="rewrite query nums",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self,
llm_client: Optional[LLMClient],
Expand Down
65 changes: 65 additions & 0 deletions dbgpt/rag/operators/summary.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,77 @@
from typing import Any, Optional

from dbgpt.core import LLMClient
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.knowledge.base import Knowledge
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
from dbgpt.serve.rag.operators.base import AssemblerOperator


class SummaryAssemblerOperator(AssemblerOperator[Any, Any]):
metadata = ViewMetadata(
label="Summary Operator",
name="summary_assembler_operator",
category=OperatorCategory.RAG,
description="The summary assembler operator.",
inputs=[
IOField.build_from(
"Knowledge", "knowledge", Knowledge, "knowledge datasource"
)
],
outputs=[
IOField.build_from(
"document summary",
"summary",
str,
description="document summary",
)
],
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
Parameter.build_from(
label="model name",
name="model_name",
type=str,
optional=True,
default="gpt-3.5-turbo",
description="llm model name",
),
Parameter.build_from(
label="prompt language",
name="language",
type=str,
optional=True,
default="en",
description="prompt language",
),
Parameter.build_from(
label="max_iteration_with_llm",
name="max_iteration_with_llm",
type=int,
optional=True,
default=5,
description="prompt language",
),
Parameter.build_from(
label="concurrency_limit_with_llm",
name="concurrency_limit_with_llm",
type=int,
optional=True,
default=3,
description="The concurrency limit with llm",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self,
llm_client: Optional[LLMClient],
Expand Down
Loading
Loading