Skip to content

Commit

Permalink
feat:add rag awel operator view metadata. (#1174)
Browse files Browse the repository at this point in the history
  • Loading branch information
Aries-ckt authored Feb 21, 2024
1 parent c78bd22 commit 32e1554
Show file tree
Hide file tree
Showing 10 changed files with 527 additions and 6 deletions.
4 changes: 4 additions & 0 deletions dbgpt/core/awel/flow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def __init__(self, label: str, description: str):
"output_parser": _CategoryDetail("Output Parser", "Parse the output of LLM model"),
"common": _CategoryDetail("Common", "The common operator"),
"agent": _CategoryDetail("Agent", "The agent operator"),
"rag": _CategoryDetail("RAG", "The RAG operator"),
}


Expand All @@ -124,6 +125,7 @@ class OperatorCategory(str, Enum):
OUTPUT_PARSER = "output_parser"
COMMON = "common"
AGENT = "agent"
RAG = "rag"

def label(self) -> str:
"""Get the label of the category."""
Expand Down Expand Up @@ -163,6 +165,7 @@ class OperatorType(str, Enum):
"common": _CategoryDetail("Common", "The common resource"),
"prompt": _CategoryDetail("Prompt", "The prompt resource"),
"agent": _CategoryDetail("Agent", "The agent resource"),
"rag": _CategoryDetail("RAG", "The resource"),
}


Expand All @@ -176,6 +179,7 @@ class ResourceCategory(str, Enum):
COMMON = "common"
PROMPT = "prompt"
AGENT = "agent"
RAG = "rag"

def label(self) -> str:
"""Get the label of the category."""
Expand Down
51 changes: 51 additions & 0 deletions dbgpt/core/awel/trigger/http_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,3 +1031,54 @@ def __init__(self, key: str = "user_input", **kwargs):
async def map(self, request_body: CommonLLMHttpRequestBody) -> Dict[str, Any]:
"""Map the request body to response body."""
return {self._key: request_body.messages}


class RequestedParsedOperator(MapOperator[CommonLLMHttpRequestBody, str]):
"""User input parsed operator."""

metadata = ViewMetadata(
label="Request Body Parsed To String Operator",
name="request_body_to_str__parsed_operator",
category=OperatorCategory.COMMON,
parameters=[
Parameter.build_from(
"Key",
"key",
str,
optional=True,
default="",
description="The key of the dict, link 'user_input'",
)
],
inputs=[
IOField.build_from(
"Request Body",
"request_body",
CommonLLMHttpRequestBody,
description="The request body of the API endpoint",
)
],
outputs=[
IOField.build_from(
"User Input String",
"user_input_str",
str,
description="The user input dict of the API endpoint",
)
],
description="User input parsed operator",
)

def __init__(self, key: str = "user_input", **kwargs):
"""Initialize a UserInputParsedOperator."""
self._key = key
super().__init__(**kwargs)

async def map(self, request_body: CommonLLMHttpRequestBody) -> str:
"""Map the request body to response body."""
dict_value = request_body.dict()
if not self._key or self._key not in dict_value:
raise ValueError(
f"Prefix key {self._key} is not a valid key of the request body"
)
return dict_value[self._key]
39 changes: 39 additions & 0 deletions dbgpt/core/interface/operators/llm_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,42 @@ async def transform_stream(self, output_iter: AsyncIterator[ModelOutput]):
decoded_unicode = model_output.text.replace("\ufffd", "")
msg = decoded_unicode.replace("\n", "\\n")
yield f"data:{msg}\n\n"


class StringOutput2ModelOutputOperator(MapOperator[str, ModelOutput]):
"""Map String to ModelOutput."""

metadata = ViewMetadata(
label="Map String to ModelOutput",
name="string_2_model_output_operator",
category=OperatorCategory.COMMON,
description="Map String to ModelOutput.",
parameters=[],
inputs=[
IOField.build_from(
"String",
"input_value",
str,
description="The input value of the operator.",
),
],
outputs=[
IOField.build_from(
"Model Output",
"input_value",
ModelOutput,
description="The input value of the operator.",
),
],
)

def __int__(self, **kwargs):
"""Create a new operator."""
super().__init__(**kwargs)

async def map(self, input_value: str) -> ModelOutput:
"""Map the model output to the common response body."""
return ModelOutput(
text=input_value,
error_code=500,
)
72 changes: 69 additions & 3 deletions dbgpt/rag/operators/knowledge.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,92 @@
from typing import Any, List, Optional

from dbgpt.core.awel import MapOperator
from dbgpt.core.awel.flow import (
IOField,
OperatorCategory,
OptionValue,
Parameter,
ViewMetadata,
)
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType
from dbgpt.rag.knowledge.factory import KnowledgeFactory


class KnowledgeOperator(MapOperator[Any, Any]):
"""Knowledge Operator."""
"""Knowledge Factory Operator."""

metadata = ViewMetadata(
label="Knowledge Factory Operator",
name="knowledge_operator",
category=OperatorCategory.RAG,
description="The knowledge operator.",
inputs=[
IOField.build_from(
"knowledge datasource",
"knowledge datasource",
dict,
"knowledge datasource",
)
],
outputs=[
IOField.build_from(
"Knowledge",
"Knowledge",
Knowledge,
description="Knowledge",
)
],
parameters=[
Parameter.build_from(
label="datasource",
name="datasource",
type=str,
optional=True,
default="DOCUMENT",
description="datasource",
),
Parameter.build_from(
label="knowledge_type",
name="knowledge type",
type=str,
optional=True,
options=[
OptionValue(
label="DOCUMENT",
name="DOCUMENT",
value=KnowledgeType.DOCUMENT.name,
),
OptionValue(label="URL", name="URL", value=KnowledgeType.URL.name),
OptionValue(
label="TEXT", name="TEXT", value=KnowledgeType.TEXT.name
),
],
default=KnowledgeType.DOCUMENT.name,
description="knowledge type",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self, knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT, **kwargs
self,
datasource: Optional[str] = None,
knowledge_type: Optional[str] = KnowledgeType.DOCUMENT.name,
**kwargs
):
"""Init the query rewrite operator.
Args:
knowledge_type: (Optional[KnowledgeType]) The knowledge type.
"""
super().__init__(**kwargs)
self._knowledge_type = knowledge_type
self._datasource = datasource
self._knowledge_type = KnowledgeType.get_by_value(knowledge_type)

async def map(self, datasource: IN) -> Knowledge:
"""knowledge operator."""
if self._datasource:
datasource = self._datasource
return await self.blocking_func_to_async(
KnowledgeFactory.create, datasource, self._knowledge_type
)
54 changes: 54 additions & 0 deletions dbgpt/rag/operators/rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,67 @@

from dbgpt.core import LLMClient
from dbgpt.core.awel import MapOperator
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.retriever.rewrite import QueryRewrite


class QueryRewriteOperator(MapOperator[Any, Any]):
"""The Rewrite Operator."""

metadata = ViewMetadata(
label="Query Rewrite Operator",
name="query_rewrite_operator",
category=OperatorCategory.RAG,
description="query rewrite operator.",
inputs=[
IOField.build_from("query_context", "query_context", dict, "query context")
],
outputs=[
IOField.build_from(
"rewritten queries",
"queries",
List[str],
description="rewritten queries",
)
],
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
Parameter.build_from(
label="model name",
name="model_name",
type=str,
optional=True,
default="gpt-3.5-turbo",
description="llm model name",
),
Parameter.build_from(
label="prompt language",
name="language",
type=str,
optional=True,
default="en",
description="prompt language",
),
Parameter.build_from(
label="nums",
name="nums",
type=int,
optional=True,
default=5,
description="rewrite query nums",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self,
llm_client: Optional[LLMClient],
Expand Down
65 changes: 65 additions & 0 deletions dbgpt/rag/operators/summary.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,77 @@
from typing import Any, Optional

from dbgpt.core import LLMClient
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
from dbgpt.core.awel.task.base import IN
from dbgpt.rag.knowledge.base import Knowledge
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
from dbgpt.serve.rag.operators.base import AssemblerOperator


class SummaryAssemblerOperator(AssemblerOperator[Any, Any]):
metadata = ViewMetadata(
label="Summary Operator",
name="summary_assembler_operator",
category=OperatorCategory.RAG,
description="The summary assembler operator.",
inputs=[
IOField.build_from(
"Knowledge", "knowledge", Knowledge, "knowledge datasource"
)
],
outputs=[
IOField.build_from(
"document summary",
"summary",
str,
description="document summary",
)
],
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
Parameter.build_from(
label="model name",
name="model_name",
type=str,
optional=True,
default="gpt-3.5-turbo",
description="llm model name",
),
Parameter.build_from(
label="prompt language",
name="language",
type=str,
optional=True,
default="en",
description="prompt language",
),
Parameter.build_from(
label="max_iteration_with_llm",
name="max_iteration_with_llm",
type=int,
optional=True,
default=5,
description="prompt language",
),
Parameter.build_from(
label="concurrency_limit_with_llm",
name="concurrency_limit_with_llm",
type=int,
optional=True,
default=3,
description="The concurrency limit with llm",
),
],
documentation_url="https://github.com/openai/openai-python",
)

def __init__(
self,
llm_client: Optional[LLMClient],
Expand Down
Loading

0 comments on commit 32e1554

Please sign in to comment.