diff --git a/README.md b/README.md index f463342..7a3405c 100644 --- a/README.md +++ b/README.md @@ -93,3 +93,11 @@ Use G|B|V|C|D|E|R|T keys to rotate to absolute orientations. 'F' to cancel a rot ```bash docker rm $(docker ps -a -q) ; docker images | grep '' | awk '{print $3}' | xargs docker rmi ``` + + + + +TODOs: +5. Build a key value store to store the verbal command and the list of commands. +6. Index the key value store with the verbal command in to vector db. +7. Add RAG after voice recognition. \ No newline at end of file diff --git a/mnlm/client/gpt_control/assistant.py b/mnlm/client/gpt_control/assistant.py index f5eef90..bad5af9 100644 --- a/mnlm/client/gpt_control/assistant.py +++ b/mnlm/client/gpt_control/assistant.py @@ -164,6 +164,7 @@ def start_conversation( use_voice_input: bool, use_voice_output: bool, use_dummy_robot_arm_server: bool, + use_rag: bool, logger: Logger, ) -> None: client = OpenAI() @@ -171,6 +172,7 @@ def start_conversation( logger=logger, verbose=verbose, use_dummy_robot_arm_server=use_dummy_robot_arm_server, + use_rag=use_rag, ) assistant = create_assistant( client=client, tools=tools, logger=logger, verbose=verbose @@ -233,6 +235,7 @@ def start_conversation( use_voice_input = True # Set to True to enable voice input. In docker container, it's not possible. use_voice_output = True # Set to True to enable voice output. In docker container, it's not possible. use_dummy_robot_arm_server = False # Set to True to use the simulation mode + use_rag = True logger = Logger(__name__) start_conversation( verbose=verbose, @@ -240,5 +243,6 @@ def start_conversation( use_voice_input=use_voice_input, use_voice_output=use_voice_output, use_dummy_robot_arm_server=use_dummy_robot_arm_server, + use_rag=use_rag, logger=logger, ) diff --git a/mnlm/client/gpt_control/command_indexer.py b/mnlm/client/gpt_control/command_indexer.py new file mode 100644 index 0000000..bba29bd --- /dev/null +++ b/mnlm/client/gpt_control/command_indexer.py @@ -0,0 +1,261 @@ +import argparse +import json +import os +from typing import Any, Dict, List + +import faiss +import numpy as np +from dotenv import load_dotenv +from openai import OpenAI +from utils import Logger + + +class InstructionIndexer: + + def __init__(self): + """ + Initialize the InstructionIndexer class. + + Args: + json_file_path (str): Path to the JSON file containing instructions. + """ + load_dotenv(override=True) + self.logger = Logger(__file__) + self.client = OpenAI() + self.operation_sequences = [] + self.index = None + + def create_index( + self, command_bank_file_path: str, index_destination: str, data_destination: str + ) -> None: + """ + Create the FAISS index for instructions. + """ + instructions_data = self._load_json_file(command_bank_file_path) + instructions = [] + operation_sequences = [] + for instruction, operations in instructions_data.items(): + instructions.append(instruction) + operations_blob = { + "instruction": instruction, + "operations": operations["operations"], + } + operation_sequences.append(operations_blob) + + embeddings = self._embed_instructions(instructions) + + # Creating the FAISS index + dimension = embeddings.shape[1] + self.index = faiss.IndexFlatL2(dimension) + self.index.add(embeddings) + + # Save the FAISS index and operation sequences + self._save_index_and_data( + operation_sequences=operation_sequences, + index_destination=index_destination, + data_destination=data_destination, + ) + + def _load_json_file(self, command_bank_file_path: str) -> Dict[str, Any]: + """ + Load the JSON file containing instructions. + + Returns: + dict: Dictionary containing instructions. + """ + self.logger.info(f"Loading JSON file: {command_bank_file_path}") + with open(command_bank_file_path, "r") as file: + return json.load(file) + + def _embed_instructions(self, instructions: List[str]) -> np.ndarray: + """ + Embed instructions using OpenAI's embedding API. + + Args: + instructions (list): List of instructions. + + Returns: + np.ndarray: Array of instruction embeddings. + """ + self.logger.info(f"Embedding instructions...") + embeddings = [] + for instruction in instructions: + # Ensure instruction is a single line + instruction = instruction.replace("\n", " ") + # Create embedding + response = self.client.embeddings.create( + input=[instruction], model="text-embedding-3-small" + ) + embeddings.append(response.data[0].embedding) + return np.array(embeddings, dtype="float32") + + def _save_index_and_data( + self, + operation_sequences: List[Dict[str, Any]], + index_destination: str, + data_destination: str, + ) -> None: + """ + Save the FAISS index and operation sequences to files. + + Args: + index_destination (str): Path to save the FAISS index. + data_destination (str): Path to save the operation sequences. + """ + # Ensure the index and data are created + if self.index is None: + raise ValueError("Index has not been created. Call create_index() first.") + + # Save the FAISS index + if os.path.exists(index_destination): + os.remove(index_destination) + os.makedirs(os.path.dirname(index_destination), exist_ok=True) + faiss.write_index(self.index, index_destination) + + # Save the operation sequences + if os.path.exists(data_destination): + os.remove(data_destination) + os.makedirs(os.path.dirname(data_destination), exist_ok=True) + with open(data_destination, "w") as file: + json.dump(operation_sequences, file, indent=2) + + def load_index_and_data( + self, index_path: str = None, data_path: str = None + ) -> None: + """ + Load the FAISS index and operation sequences from files. + + Args: + index_path (str): Path to the FAISS index file. + data_path (str): Path to the operation sequences file. + """ + if not index_path: + index_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + "knowledge/index/instructions.index", + ) + if not data_path: + data_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + "knowledge/index/instructions_data.json", + ) + self.index = faiss.read_index(index_path) + with open(data_path, "r") as file: + self.operation_sequences = json.load(file) + + def retrieve_operation_sequences(self, instruction: str, k: int = 1) -> str: + """ + Retrieve the operation sequences for a given query. + + Args: + instruction (str): Query to search for. + k (int): Number of operation sequences to retrieve. + + Returns: + list: List of operation sequences. + """ + # Embed the query + query_embedding = self._embed_instructions([instruction]) + + # Search the index + _, indices = self.index.search(query_embedding, k) + retrieved_operations = [self.operation_sequences[i] for i in indices[0]] + json_blob = json.dumps(retrieved_operations[0]) + return json_blob + + +def parse_args(): + # Setup argument parser + parser = argparse.ArgumentParser(description="Instruction Indexer") + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + index_destination = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "knowledge/index/instructions.index" + ) + + data_destination = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + "knowledge/index/instructions_data.json", + ) + + # Subparser for creating index + create_index_parser = subparsers.add_parser( + "index", help="Create a new FAISS index from JSON data" + ) + + default_command_file_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "knowledge/command_bank.json" + ) + create_index_parser.add_argument( + "--command-bank-file-path", + type=str, + default=default_command_file_path, + help="Path to the JSON file containing instructions", + ) + + create_index_parser.add_argument( + "--index-destination", + type=str, + default=index_destination, + required=False, + help="Path to save the FAISS index", + ) + + create_index_parser.add_argument( + "--data-destination", + type=str, + default=data_destination, + required=False, + help="Path to save the operation sequences", + ) + + # Subparser for querying index + query_index_parser = subparsers.add_parser( + "query", help="Query an existing FAISS index" + ) + query_index_parser.add_argument( + "-q", "--query", type=str, help="Query to search for" + ) + query_index_parser.add_argument( + "--index-path", + type=str, + default=index_destination, + required=False, + help="Path to the FAISS index file", + ) + query_index_parser.add_argument( + "--data-path", + type=str, + default=data_destination, + required=False, + help="Path to the JSON file containing operation sequences", + ) + query_index_parser.add_argument( + "--k", + type=int, + default=1, + required=False, + help="Number of operation sequences to retrieve", + ) + + # Parse arguments + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + if args.command == "index": + indexer = InstructionIndexer() + indexer.create_index( + command_bank_file_path=args.command_bank_file_path, + index_destination=args.index_destination, + data_destination=args.data_destination, + ) + print("Index and data saved.") + else: + indexer = InstructionIndexer() + indexer.load_index_and_data( + index_path=args.index_path, data_path=args.data_path + ) + operation_sequences = indexer.retrieve_operation_sequences(args.query, args.k) + print(f"Operation sequences: {operation_sequences}") diff --git a/mnlm/client/gpt_control/tools.py b/mnlm/client/gpt_control/tools.py index aefadcd..c523c1f 100644 --- a/mnlm/client/gpt_control/tools.py +++ b/mnlm/client/gpt_control/tools.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import Any, Dict, Optional +from command_indexer import InstructionIndexer # type: ignore from openai import OpenAI from robot_arm import ( OperationSequenceGenerator, @@ -80,6 +81,7 @@ def __init__( logger: Logger, gpt_client: Optional[OpenAI] = None, simulation: bool = True, + use_rag: bool = False, verbose: bool = False, ): super().__init__(name=name, logger=logger, verbose=verbose) @@ -87,11 +89,17 @@ def __init__( api_document_path = os.path.join( os.path.dirname(os.path.dirname(__file__)), "knowledge", "robot_arm.md" ) - self.operation_generator = OperationSequenceGenerator( - api_document_path=api_document_path, - gpt_client=gpt_client, - logger=logger, - ) + self.use_rag = use_rag + if self.use_rag: + self.indexer = InstructionIndexer() + self.indexer.load_index_and_data() + else: + self.operation_generator = OperationSequenceGenerator( + api_document_path=api_document_path, + gpt_client=gpt_client, + logger=logger, + ) + self.simulation = simulation self.knowledge = f""" @@ -125,9 +133,14 @@ def get_signature(self) -> Dict[str, Any]: def execute(self, instruction: str) -> str: try: # Execute operations using the chosen mode (simulation or real) - operations_json = self.operation_generator.translate_prompt_to_sequence( - prompt=instruction - ) + if self.use_rag: + operations_json = self.indexer.retrieve_operation_sequences( + instruction=instruction + ) + else: + operations_json = self.operation_generator.translate_prompt_to_sequence( + prompt=instruction + ) if self.verbose: self.logger.info(f"Robot arm command: {operations_json}.") self.robot_arm_control.execute_operations(operations_json) @@ -138,7 +151,10 @@ def execute(self, instruction: str) -> str: def init_tools( - logger: Logger, verbose: bool = False, use_dummy_robot_arm_server: bool = False + logger: Logger, + verbose: bool = False, + use_dummy_robot_arm_server: bool = False, + use_rag: bool = False, ) -> Dict[str, Any]: """Initialize the tools for the assistant. @@ -155,6 +171,7 @@ def init_tools( name="robot_arm", logger=logger, verbose=verbose, + use_rag=use_rag, simulation=use_dummy_robot_arm_server, ), } diff --git a/mnlm/client/knowledge/command_bank.json b/mnlm/client/knowledge/command_bank.json new file mode 100644 index 0000000..9084f89 --- /dev/null +++ b/mnlm/client/knowledge/command_bank.json @@ -0,0 +1,82 @@ +{ + "reset": { + "operations": [ + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 0, 0, 0, 0, 0, 0], "time": 500} + } + ] + }, + "A simple dance": { + "operations": [ + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 45, 60, 60, 0, 0, 0], "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": -60, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": 60, "time": 500} + }, + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 0, 0, 0, 0, 0, 0], "time": 500} + } + ] + }, + "A complex dance": { + "operations": [ + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 45, 60, 60, 0, 0, 0], "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": -60, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": 60, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": -60, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo0", "angle": 60, "time": 500} + }, + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 0, 0, 0, 0, 0, 0], "time": 500} + } + ] + }, + "node the head": { + "operations": [ + { + "operation": "move_single_servo", + "parameters": {"id": "servo3", "angle": 50, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo3", "angle": 10, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo3", "angle": 50, "time": 500} + }, + { + "operation": "move_single_servo", + "parameters": {"id": "servo3", "angle": 0, "time": 500} + }, + { + "operation": "move_all_servos", + "parameters": {"angles": [0, 0, 0, 0, 0, 0, 0], "time": 500} + } + ] + } +} \ No newline at end of file diff --git a/mnlm/client/knowledge/index/instructions.index b/mnlm/client/knowledge/index/instructions.index new file mode 100644 index 0000000..ba4653d Binary files /dev/null and b/mnlm/client/knowledge/index/instructions.index differ diff --git a/mnlm/client/knowledge/index/instructions_data.json b/mnlm/client/knowledge/index/instructions_data.json new file mode 100644 index 0000000..12d9073 --- /dev/null +++ b/mnlm/client/knowledge/index/instructions_data.json @@ -0,0 +1,192 @@ +[ + { + "instruction": "reset", + "operations": [ + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "time": 500 + } + } + ] + }, + { + "instruction": "A simple dance", + "operations": [ + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 45, + 60, + 60, + 0, + 0, + 0 + ], + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": -60, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": 60, + "time": 500 + } + }, + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "time": 500 + } + } + ] + }, + { + "instruction": "A complex dance", + "operations": [ + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 45, + 60, + 60, + 0, + 0, + 0 + ], + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": -60, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": 60, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": -60, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo0", + "angle": 60, + "time": 500 + } + }, + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "time": 500 + } + } + ] + }, + { + "instruction": "node the head", + "operations": [ + { + "operation": "move_single_servo", + "parameters": { + "id": "servo3", + "angle": 50, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo3", + "angle": 10, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo3", + "angle": 50, + "time": 500 + } + }, + { + "operation": "move_single_servo", + "parameters": { + "id": "servo3", + "angle": 0, + "time": 500 + } + }, + { + "operation": "move_all_servos", + "parameters": { + "angles": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "time": 500 + } + } + ] + } +] \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 0befa79..9e531c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -221,6 +221,40 @@ files = [ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "faiss-cpu" +version = "1.7.4" +description = "A library for efficient similarity search and clustering of dense vectors." +optional = false +python-versions = "*" +files = [ + {file = "faiss-cpu-1.7.4.tar.gz", hash = "sha256:265dc31b0c079bf4433303bf6010f73922490adff9188b915e2d3f5e9c82dd0a"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50d4ebe7f1869483751c558558504f818980292a9b55be36f9a1ee1009d9a686"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b1db7fae7bd8312aeedd0c41536bcd19a6e297229e1dce526bde3a73ab8c0b5"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17b7fa7194a228a84929d9e6619d0e7dbf00cc0f717e3462253766f5e3d07de8"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dca531952a2e3eac56f479ff22951af4715ee44788a3fe991d208d766d3f95f3"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl", hash = "sha256:7173081d605e74766f950f2e3d6568a6f00c53f32fd9318063e96728c6c62821"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0bbd6f55d7940cc0692f79e32a58c66106c3c950cee2341b05722de9da23ea3"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e13c14280376100f143767d0efe47dcb32618f69e62bbd3ea5cd38c2e1755926"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c521cb8462f3b00c0c7dfb11caff492bb67816528b947be28a3b76373952c41d"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afdd9fe1141117fed85961fd36ee627c83fc3b9fd47bafb52d3c849cc2f088b7"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-win_amd64.whl", hash = "sha256:2ff7f57889ea31d945e3b87275be3cad5d55b6261a4e3f51c7aba304d76b81fb"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eeaf92f27d76249fb53c1adafe617b0f217ab65837acf7b4ec818511caf6e3d8"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:102b1bd763e9b0c281ac312590af3eaf1c8b663ccbc1145821fe6a9f92b8eaaf"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5512da6707c967310c46ff712b00418b7ae28e93cb609726136e826e9f2f14fa"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0c2e5b9d8c28c99f990e87379d5bbcc6c914da91ebb4250166864fd12db5755b"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:43f67f325393145d360171cd98786fcea6120ce50397319afd3bb78be409fb8a"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6a4e4af194b8fce74c4b770cad67ad1dd1b4673677fc169723e4c50ba5bd97a8"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31bfb7b9cffc36897ae02a983e04c09fe3b8c053110a287134751a115334a1df"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52d7de96abef2340c0d373c1f5cbc78026a3cebb0f8f3a5920920a00210ead1f"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:699feef85b23c2c729d794e26ca69bebc0bee920d676028c06fd0e0becc15c7e"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:559a0133f5ed44422acb09ee1ac0acffd90c6666d1bc0d671c18f6e93ad603e2"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1d71539fe3dc0f1bed41ef954ca701678776f231046bf0ca22ccea5cf5bef6"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12d45e0157024eb3249842163162983a1ac8b458f1a8b17bbf86f01be4585a99"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f0eab359e066d32c874f51a7d4bf6440edeec068b7fe47e6d803c73605a8b4c"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:98459ceeeb735b9df1a5b94572106ffe0a6ce740eb7e4626715dd218657bb4dc"}, +] + [[package]] name = "h11" version = "0.14.0" @@ -984,4 +1018,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "8edb8a8f5b6ecc2cee84f9f8d1b60f7d10ae44eb9d126998f4cdf8c957134e5b" +content-hash = "209287a9a15b75ff958ca681f4e0f7022148338bee064de10230aecd582afecf" diff --git a/pyproject.toml b/pyproject.toml index f3e02fd..beae40b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ colorama = "^0.4.6" types-pyaudio = "^0.2.16.7" types-requests = "^2.31.0.10" types-colorama = "^0.4.15.12" +faiss-cpu = "^1.7.4" [tool.poetry.group.dev.dependencies]