Replies: 4 comments
-
I see no mention of Bard on our discord, but that would be a good place to raise the question. |
Beta Was this translation helpful? Give feedback.
-
@bomsn made it work: #46 (comment) |
Beta Was this translation helpful? Give feedback.
-
You can do it by building a wrapper API, you can do it locally or host it somewhere. I suggest using FastAPI, create an endpoint "/chat/completions/" then populate the credentials list accordignly: Here is the code I wrote for this: import datetime
import random
# Data validation
from pydantic import BaseModel
from typing import List, Optional
# FastAPI
from fastapi import APIRouter, Header, HTTPException, Depends
# Google Vertex AI
import vertexai
from vertexai.language_models import ChatModel, CodeChatModel, ChatMessage
from vertexai.preview.language_models import ChatModel as PreviewChatModel
from vertexai.preview.language_models import CodeChatModel as PreviewCodeChatModel
from google.oauth2 import service_account
# Pydantic models
class Message(BaseModel):
role: str
content: str
class RequestData(BaseModel):
model: str
credentials: dict
location: Optional[str] = 'us-central1'
messages: List[Message]
temperature: Optional[float] = 0
max_tokens: Optional[int] = 2048
class Choice(BaseModel):
index: int
message: Message
finish_reason: str
class Usage(BaseModel):
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
class ResponseData(BaseModel):
id: int
object: str = "chat.completion"
created: float
model: str
choices: List[Choice]
usage: Usage
# Function to extract API key from the request's Authorization header
async def get_api_key(authorization: str = Header(None)):
if authorization is None:
raise HTTPException(status_code=400, detail="Missing Authorization header")
# Extract the API key from the Authorization header
api_key = authorization.replace("Bearer ", "")
return api_key
#######################################################################
########################## API Endpoints ##############################
#######################################################################
chat = APIRouter()
@chat.post("/vertexai/chat/completions/", response_model=ResponseData, summary="Send a chat message to Vertex AI LLM and get an OpenAI-compatible API response.")
async def chat_completions_endpoint(request: RequestData, api_key: str = Depends(get_api_key)):
"""
This function `chat_completions_endpoint` is an API endpoint that accepts POST requests at "/chat/completions/".
It takes in a JSON request body and an API key from the Authorization header. The JSON request body should contain the following fields:
- `model`: The name of the Vertex AI model to use for generating chat completions ( only "codechat-bison" and "chat-bison" are supported ).
- `credentials`: The service account credentials for accessing Vertex AI.
- `location`: The location of the Vertex AI service (default is 'us-central1').
- `messages`: A list of chat messages. Each message is a dictionary with 'role' and 'content' fields.
- `temperature`: The randomness of the generated responses (default is 0).
- `max_tokens`: The maximum number of tokens in the generated response (default is 2048).
The function sends a chat message to Vertex AI using the specified model and returns a JSON response containing the generated chat completion.
The response includes the following fields:
- `id`: A random ID for the chat completion.
- `object`: The type of the object, which is "chat.completion".
- `created`: The timestamp when the chat completion was created.
- `model`: The name of the Vertex AI model used.
- `choices`: A list containing the generated chat completion. Each choice is a dictionary with 'index', 'message', and 'finish_reason' fields.
- `usage`: A dictionary containing the number of tokens used in the prompt, completion, and total.
If the API key is invalid, the function raises an HTTP 403 error. If any other error occurs during request processing, it raises an HTTP 500 error.
"""
try:
if api_key == "your_own_secret_api_key":
# Access data fields directly
model = request.model
credentials_object = request.credentials
location = request.location
messages = request.messages
temperature = request.temperature
max_tokens = request.max_tokens
# Prepare the data
system_message = ""
user_bot_pairs = []
last_message_role = None # To keep track of the last message role
# Extract system message (context) and the chat history
for message in messages:
role = message.role
content = message.content
if role == "system":
# Modify AutoGen default prompt to avoid mistakes
if "Read the above conversation" in content:
if "Read the following conversation" not in system_message:
content = content.replace("Read the above conversation", "Read the conversation")
else:
continue
# Add a line break if the system message is not empty
if system_message:
system_message += "\n"
system_message += content
last_message_role = "system"
else:
if last_message_role == "user":
user_bot_pairs.append(ChatMessage(content=content, author='bot'))
last_message_role = "bot"
else:
user_bot_pairs.append(ChatMessage(content=content, author='user'))
last_message_role = "user"
# Get the last user message
last_user_message=""
# Make sure the `user_bot_pairs` List have an odd number of messages as required by Google API
if len(user_bot_pairs) % 2 == 1:
# The history have an odd number of messages
# If we also count "last message" we will have a none odd number of messages in total.
# This will result in an error "There should be odd number of messages for correct alternating turn."
# Let's fix this.
last_user_message += user_bot_pairs[-1].content
user_bot_pairs.pop()
elif messages[-1].role == 'user' or messages[-1].role == 'system':
last_user_message = messages[-1].content
else:
last_user_message="Provide the best response based on the context and message history."
######################################### Credentials ####################################
credentials = service_account.Credentials.from_service_account_info(credentials_object)
################################## Initialize Vertex AI ##################################
vertexai.init(project=credentials_object["project_id"], location=location, credentials=credentials)
parameters = {
"max_output_tokens": max_tokens if max_tokens <= 2048 else 2048,
"temperature": temperature
}
if model in ['codechat-bison', 'codechat-bison@001', 'codechat-bison-32k']:
chat_model = PreviewCodeChatModel.from_pretrained(model) if model == 'codechat-bison-32k' else CodeChatModel.from_pretrained(model)
chat = chat_model.start_chat(context=system_message, message_history= user_bot_pairs, **parameters)
response = chat.send_message(last_user_message)
else:
chat_model = PreviewChatModel.from_pretrained(model) if model == 'chat-bison-32k' else ChatModel.from_pretrained(model)
chat = chat_model.start_chat(context=system_message, message_history=user_bot_pairs )
response = chat.send_message(last_user_message, **parameters)
# Cleaning
if response.text.startswith("Assistant: "):
response.text.replace("Assistant: ", "")
response.text.strip()
# Create response using the model
response_dict = ResponseData(
id=random.randint(1, 999999999999999999999),
model=model,
created=datetime.datetime.now().timestamp(),
choices=[
Choice(
index=0,
message=Message(role="user", content=response.text),
finish_reason="stop"
)
],
usage=Usage()
)
return response_dict
else:
raise HTTPException(status_code=403, detail="Invalid API key")
except Exception as e:
raise HTTPException(status_code=500, detail="Server Error: " + str(e)) Make sure to install dependencies and replace After that, add something like this to use config file:
Note: I've removed some parts from the code for privacy reasons, so make sure to adjust accordingly if needed. |
Beta Was this translation helpful? Give feedback.
-
I'll resolve this for now. Please reopen to continue the discussion. |
Beta Was this translation helpful? Give feedback.
-
How to set Google Bard AI in the OAI_CONFIG_LIST file? It is possible ?
Beta Was this translation helpful? Give feedback.
All reactions