diff --git a/docker/hri/chromadb.yaml b/docker/hri/chromadb.yaml index b23c20e..bd0c727 100644 --- a/docker/hri/chromadb.yaml +++ b/docker/hri/chromadb.yaml @@ -1,5 +1,3 @@ -version: '3.8' - services: chromadb: image: chromadb/chroma:latest diff --git a/docker/hri/docker-compose.yml b/docker/hri/docker-compose.yml new file mode 100644 index 0000000..2466f69 --- /dev/null +++ b/docker/hri/docker-compose.yml @@ -0,0 +1,9 @@ +include: + # Runs speech and nlp nodes + - path: hri-ros.yaml + + # Runs Faster-whisper + - path: stt.yaml + + # Chroma db for embeddings + - path: chromadb.yaml \ No newline at end of file diff --git a/docker/hri/devices.yaml b/docker/hri/hri-ros.yaml similarity index 75% rename from docker/hri/devices.yaml rename to docker/hri/hri-ros.yaml index 58d6851..9789adb 100644 --- a/docker/hri/devices.yaml +++ b/docker/hri/hri-ros.yaml @@ -11,6 +11,7 @@ services: dockerfile: docker/hri/Dockerfile.cpu args: BASE_IMAGE: roborregos/home2:cpu_base + image: roborregos/home2:hri-cpu volumes: - ../../:/workspace/src @@ -27,7 +28,7 @@ services: env_file: - .env tty: true + entrypoint: [ "bash", "-il", "-c"] command: [ - "bash", - # "source /workspace/ws/devel/setup.bash && roslaunch hri recepcionist_laptop.launch", + "colcon build --symlink-install --packages-select task_manager frida_interfaces frida_constants speech nlp embeddings && source ~/.bashrc && ros2 launch speech hri_launch.py" ] diff --git a/frida_constants/frida_constants/hri_constants.py b/frida_constants/frida_constants/hri_constants.py index 920d7f1..b0a11fe 100644 --- a/frida_constants/frida_constants/hri_constants.py +++ b/frida_constants/frida_constants/hri_constants.py @@ -1,8 +1,15 @@ SPEAK_SERVICE = "/speech/speak" -HEAR_SERVICE = "/speech/STT" +STT_SERVICE_NAME = "/speech/STT" KEYWORD_TOPIC = "/speech/kws" COMMAND_INTERPRETER_SERVICE = "/nlp/command_interpreter" -DATA_EXTRACTOR_SERVICE = "/nlp/data_extractor" +EXTRACT_DATA_SERVICE = "/nlp/data_extractor" SENTENCE_BUILDER_SERVICE = "/nlp/sentence_builder" ITEM_CATEGORIZATION_SERVICE = "/nlp/item_categorization" CONVESATION_SERVICE = "/nlp/conversation" +GRAMMAR_SERVICE = "/nlp/grammar" + +ADD_ITEM_SERVICE = "/nlp/embeddings/add_item_service" +REMOVE_ITEM_SERVICE = "/nlp/embeddings/remove_item_service" +UPDATE_ITEM_SERVICE = "/nlp/embeddings/update_item_service" +QUERY_ITEM_SERVICE = "/nlp/embeddings/query_item_service" +BUILD_EMBEDDINGS_SERVICE = "/nlp/embeddings/build_embeddings_service" diff --git a/frida_interfaces/hri/srv/CommandInterpreter.srv b/frida_interfaces/hri/srv/CommandInterpreter.srv index c6e6ed4..d762ea0 100644 --- a/frida_interfaces/hri/srv/CommandInterpreter.srv +++ b/frida_interfaces/hri/srv/CommandInterpreter.srv @@ -1,3 +1,3 @@ -string text_heard +string text --- -frida_interfaces/CommandList commands \ No newline at end of file +frida_interfaces/Command[] commands \ No newline at end of file diff --git a/frida_interfaces/hri/srv/Grammar.srv b/frida_interfaces/hri/srv/Grammar.srv new file mode 100644 index 0000000..ad24d66 --- /dev/null +++ b/frida_interfaces/hri/srv/Grammar.srv @@ -0,0 +1,3 @@ +string text +--- +string corrected_text \ No newline at end of file diff --git a/hri/README.md b/hri/README.md index b6621dc..7dea601 100644 --- a/hri/README.md +++ b/hri/README.md @@ -46,21 +46,13 @@ In addition, the following files are required: docker compose -f cuda.yaml build # or -> docker compose -f cpu.yaml build -# Use devices compose (for audio I/O) +# Build and run HRI containers # pwd -> home2/docker/hri -docker compose -f devices.yaml up +docker compose up -# Build packages -## Enter the container +# Enter the container (this container has the ros 2 environment) docker exec -it home2-hri-cuda-devices bash -# Enable non-root ownership of the workspace -# pwd -> /workspace -sudo chown -R $(id -u):$(id -g) . - -# pwd -> /workspace -colcon build --symlink-install --packages-select frida_interfaces speech nlp -source install/setup.bash ``` ## Running the project @@ -70,7 +62,10 @@ Most of the final commands will be executed using the docker compose file. However, some testing commands are the following: ```bash -# Speech (Remember to start the stt docker before) +# Launch HRI (includes speech, and nlp) +ros2 launch speech hri_launch.py + +# Speech (Remember to start the stt docker before, this is done automatically if running the hri docker compose file) ros2 launch speech devices_launch.py ros2 topic pub /speech/speak_now --once std_msgs/msg/String "data: 'Go to the kitchen and grab cookies'" @@ -81,6 +76,24 @@ ros2 launch nlp nlp_launch.py ros2 topic pub /speech/raw_command std_msgs/msg/String "data: Go to the kitchen and grab cookies" --once ``` +## Other useful commands + +Source the environment (this is automatically done in the .bashrc) +```bash +source /workspace/install/setup.bash +``` + +Build the hri packages (this is automatically done in `hri-ros.yaml` docker compose file) +```bash +colcon build --symlink-install --packages-select task_manager frida_interfaces frida_constants speech nlp embeddings +``` + +Enable file permissions for the current user, this is useful if there is a mismatch between the user in the container and the user in the host. +```bash +# pwd -> home2 +sudo chown -R $(id -u):$(id -g) . +``` + ## Speech pipeline ### AudioCapturer.py diff --git a/hri/packages/embeddings/CMakeLists.txt b/hri/packages/embeddings/CMakeLists.txt index 587c883..e1e0064 100644 --- a/hri/packages/embeddings/CMakeLists.txt +++ b/hri/packages/embeddings/CMakeLists.txt @@ -19,7 +19,7 @@ install( FILES_MATCHING PATTERN "*.py" ) -install(DIRECTORY launch +install(DIRECTORY launch config DESTINATION share/${PROJECT_NAME}) file(GLOB PYTHON_SCRIPTS scripts/*.py) diff --git a/hri/packages/embeddings/config/item_categorization.yaml b/hri/packages/embeddings/config/item_categorization.yaml new file mode 100644 index 0000000..6b0cba2 --- /dev/null +++ b/hri/packages/embeddings/config/item_categorization.yaml @@ -0,0 +1,10 @@ +item_categorization: + ros__parameters: + Embeddings_model: all-MiniLM-L12-v2 + collections_built: 0 + + ADD_ITEM_SERVICE: REPLACE + REMOVE_ITEM_SERVICE: REPLACE + UPDATE_ITEM_SERVICE: REPLACE + QUERY_ITEM_SERVICE: REPLACE + BUILD_EMBEDDINGS_SERVICE: REPLACE \ No newline at end of file diff --git a/hri/packages/embeddings/launch/chroma_launch.py b/hri/packages/embeddings/launch/chroma_launch.py index e6504c5..73f562f 100644 --- a/hri/packages/embeddings/launch/chroma_launch.py +++ b/hri/packages/embeddings/launch/chroma_launch.py @@ -1,8 +1,22 @@ +import os + +from ament_index_python.packages import get_package_share_directory from launch import LaunchDescription from launch_ros.actions import Node +from frida_constants import ModuleNames, parse_ros_config + def generate_launch_description(): + item_categorization_config = parse_ros_config( + os.path.join( + get_package_share_directory("embeddings"), + "config", + "item_categorization.yaml", + ), + [ModuleNames.HRI.value], + )["item_categorization"]["ros__parameters"] + return LaunchDescription( [ Node( @@ -11,7 +25,7 @@ def generate_launch_description(): name="embeddings", output="screen", parameters=[ - {"collections_built": 0} + item_categorization_config ], # Default value (0 means not built) ), ] diff --git a/hri/packages/embeddings/package.xml b/hri/packages/embeddings/package.xml index 8ab32d2..2d7c55b 100644 --- a/hri/packages/embeddings/package.xml +++ b/hri/packages/embeddings/package.xml @@ -12,6 +12,7 @@ rclcpp rclpy frida_interfaces + frida_constants ament_lint_auto ament_lint_common diff --git a/hri/packages/embeddings/scripts/item_categorization.py b/hri/packages/embeddings/scripts/item_categorization.py index fb00484..73552f6 100755 --- a/hri/packages/embeddings/scripts/item_categorization.py +++ b/hri/packages/embeddings/scripts/item_categorization.py @@ -1,49 +1,63 @@ #!/usr/bin/env python3 from pathlib import Path -import pandas as pd + import chromadb -from chromadb.utils import embedding_functions +import pandas as pd import rclpy +from chromadb.utils import embedding_functions from rclpy.node import Node from rclpy.parameter import Parameter + from frida_interfaces.srv import ( AddItem, + BuildEmbeddings, + QueryItem, RemoveItem, UpdateItem, - QueryItem, - BuildEmbeddings, ) class Embeddings(Node): def __init__(self): super().__init__("embeddings") - + self.get_logger().info("Initializing item_categorization.") # Declare parameters for the sentence transformer model and collections built flag self.declare_parameter("Embeddings_model", "all-MiniLM-L12-v2") self.declare_parameter("collections_built", 0) # Default: 0 (not built) + # Parameters for services + self.declare_parameter("ADD_ITEM_SERVICE", "add_item") + self.declare_parameter("REMOVE_ITEM_SERVICE", "remove_item") + self.declare_parameter("UPDATE_ITEM_SERVICE", "update_item") + self.declare_parameter("QUERY_ITEM_SERVICE", "query_item") + self.declare_parameter("BUILD_EMBEDDINGS_SERVICE", "build_embeddings") + + # Resolve parameters model_name_ = ( self.get_parameter("Embeddings_model").get_parameter_value().string_value ) - # Initialize services - self.add_item_service = self.create_service( - AddItem, "add_item", self.add_item_callback + add_item_service = ( + self.get_parameter("ADD_ITEM_SERVICE").get_parameter_value().string_value ) - self.remove_item_service = self.create_service( - RemoveItem, "remove_item", self.remove_item_callback + remove_item_service = ( + self.get_parameter("REMOVE_ITEM_SERVICE").get_parameter_value().string_value ) - self.update_item_service = self.create_service( - UpdateItem, "update_item", self.update_item_callback + update_item_service = ( + self.get_parameter("UPDATE_ITEM_SERVICE").get_parameter_value().string_value ) - self.query_item_service = self.create_service( - QueryItem, "query_item", self.query_item_callback + query_item_service = ( + self.get_parameter("QUERY_ITEM_SERVICE").get_parameter_value().string_value + ) + build_embeddings_service = ( + self.get_parameter("BUILD_EMBEDDINGS_SERVICE") + .get_parameter_value() + .string_value ) # Create the BuildEmbeddings service self.build_embeddings_service = self.create_service( - BuildEmbeddings, "build_embeddings", self.build_embeddings_callback + BuildEmbeddings, build_embeddings_service, self.build_embeddings_callback ) # Initialize ChromaDB client @@ -59,6 +73,21 @@ def __init__(self): # Check if collections are built or need to be built self.check_and_update_collections() + # Initialize services + self.add_item_service = self.create_service( + AddItem, add_item_service, self.add_item_callback + ) + self.remove_item_service = self.create_service( + RemoveItem, remove_item_service, self.remove_item_callback + ) + self.update_item_service = self.create_service( + UpdateItem, update_item_service, self.update_item_callback + ) + self.query_item_service = self.create_service( + QueryItem, query_item_service, self.query_item_callback + ) + self.get_logger().info("item_categorization initialized.") + def check_and_update_collections(self): """Check if collections exist and call the method to build them if missing.""" collections = [ @@ -92,7 +121,7 @@ def check_and_update_collections(self): self.get_logger().info( "Collections not found, proceeding to build collections." ) - self.build_embeddings() # Build the collections if not built + self.build_embeddings_callback() # Build the collections if not built def add_item_callback(self, request, response): """Service callback to add items to ChromaDB""" diff --git a/hri/packages/nlp/config/command_interpreter.yaml b/hri/packages/nlp/config/command_interpreter.yaml index 1f32282..912e1e6 100644 --- a/hri/packages/nlp/config/command_interpreter.yaml +++ b/hri/packages/nlp/config/command_interpreter.yaml @@ -4,4 +4,4 @@ command_interpreter: model: "llama3.2" speech_command_topic: "/speech/raw_command" publish_command_topic: "/task_manager/commands" - temperature: 1.0 + temperature: 0.0 diff --git a/hri/packages/nlp/config/extract_data.yaml b/hri/packages/nlp/config/extract_data.yaml index 17924ce..659d1d3 100644 --- a/hri/packages/nlp/config/extract_data.yaml +++ b/hri/packages/nlp/config/extract_data.yaml @@ -1,5 +1,6 @@ -command_interpreter: +extract_data: ros__parameters: base_url: "http://localhost:11434/v1" model: "llama3.2" - EXTRACT_DATA_SERVICE_NAME: "/extract_data" + EXTRACT_DATA_SERVICE: REPLACE + temperature: 0.0 diff --git a/hri/packages/nlp/config/stop_listener.yaml b/hri/packages/nlp/config/llm_utils.yaml similarity index 75% rename from hri/packages/nlp/config/stop_listener.yaml rename to hri/packages/nlp/config/llm_utils.yaml index e952ca1..4337c14 100644 --- a/hri/packages/nlp/config/stop_listener.yaml +++ b/hri/packages/nlp/config/llm_utils.yaml @@ -1,6 +1,8 @@ -command_interpreter: +llm_utils: ros__parameters: base_url: "http://localhost:11434/v1" model: "llama3.2" SPEECH_COMMAND_TOPIC_NAME: "/speech/raw_command" OUT_COMMAND_TOPIC_NAME: "/stop_following" + GRAMMAR_SERVICE: REPLACE + temperature: 0.0 \ No newline at end of file diff --git a/hri/packages/nlp/launch/nlp_launch.py b/hri/packages/nlp/launch/nlp_launch.py index 0aad3c8..24959a3 100755 --- a/hri/packages/nlp/launch/nlp_launch.py +++ b/hri/packages/nlp/launch/nlp_launch.py @@ -1,14 +1,39 @@ +#!/usr/bin/env python3 + import os from ament_index_python.packages import get_package_share_directory from launch import LaunchDescription from launch_ros.actions import Node +from frida_constants import ModuleNames, parse_ros_config + def generate_launch_description(): - command_interpreter_config = os.path.join( - get_package_share_directory("nlp"), "config", "command_interpreter.yaml" - ) + command_interpreter_config = parse_ros_config( + os.path.join( + get_package_share_directory("nlp"), "config", "command_interpreter.yaml" + ), + [ModuleNames.HRI.value], + )["command_interpreter"]["ros__parameters"] + + extract_data_config = parse_ros_config( + os.path.join( + get_package_share_directory("nlp"), + "config", + "extract_data.yaml", + ), + [ModuleNames.HRI.value], + )["extract_data"]["ros__parameters"] + + llm_utils_config = parse_ros_config( + os.path.join( + get_package_share_directory("nlp"), + "config", + "llm_utils.yaml", + ), + [ModuleNames.HRI.value], + )["llm_utils"]["ros__parameters"] return LaunchDescription( [ @@ -20,5 +45,21 @@ def generate_launch_description(): emulate_tty=True, parameters=[command_interpreter_config], ), + Node( + package="nlp", + executable="extract_data.py", + name="extract_data", + output="screen", + emulate_tty=True, + parameters=[extract_data_config], + ), + Node( + package="nlp", + executable="llm_utils.py", + name="llm_utils", + output="screen", + emulate_tty=True, + parameters=[llm_utils_config], + ), ] ) diff --git a/hri/packages/nlp/launch/stop_listener.py b/hri/packages/nlp/launch/stop_listener.py deleted file mode 100644 index a41f9cd..0000000 --- a/hri/packages/nlp/launch/stop_listener.py +++ /dev/null @@ -1,22 +0,0 @@ -from launch import LaunchDescription -from launch_ros.actions import Node -import os -from ament_index_python.packages import get_package_share_directory - - -def generate_launch_description(): - config = os.path.join( - get_package_share_directory("nlp"), "config", "stop_listener.yaml" - ) - return LaunchDescription( - [ - Node( - package="nlp", - executable="stop_listener.py", - name="stop_listener", - output="screen", - emulate_tty=True, - parameters=[config], - ), - ] - ) diff --git a/hri/packages/nlp/package.xml b/hri/packages/nlp/package.xml index 32e3e31..476f173 100644 --- a/hri/packages/nlp/package.xml +++ b/hri/packages/nlp/package.xml @@ -13,6 +13,8 @@ rclcpp rclpy frida_interfaces + frida_constants + ament_lint_auto ament_lint_common diff --git a/hri/packages/nlp/scripts/command_interpreter.py b/hri/packages/nlp/scripts/command_interpreter.py index 37b79bb..45feb71 100755 --- a/hri/packages/nlp/scripts/command_interpreter.py +++ b/hri/packages/nlp/scripts/command_interpreter.py @@ -17,6 +17,7 @@ from std_msgs.msg import String from frida_interfaces.msg import Command, CommandList +from frida_interfaces.srv import CommandInterpreter as CommandInterpreterSrv class CommandShape(BaseModel): @@ -41,6 +42,10 @@ def __init__(self): self.declare_parameter("model", "gpt-4o-2024-08-06") self.declare_parameter("speech_command_topic", "/speech/raw_command") self.declare_parameter("publish_command_topic", "/task_manager/commands") + self.declare_parameter( + "COMMAND_INTERPRETER_SERVICE", "/nlp/command_interpreter" + ) + self.declare_parameter("temperature", 0.5) base_url = self.get_parameter("base_url").get_parameter_value().string_value @@ -55,6 +60,11 @@ def __init__(self): .get_parameter_value() .string_value ) + command_interpreter_service = ( + self.get_parameter("COMMAND_INTERPRETER_SERVICE") + .get_parameter_value() + .string_value + ) self.temperature = ( self.get_parameter("temperature").get_parameter_value().double_value @@ -73,6 +83,9 @@ def __init__(self): String, speech_command_topic, self._callback, 10 ) self.publisher = self.create_publisher(CommandList, publish_command_topic, 10) + self.create_service( + CommandInterpreterSrv, command_interpreter_service, self.command_service + ) self.get_logger().info("Initialized Command Interpreter") @@ -83,6 +96,15 @@ def _callback(self, data: String) -> None: def run(self, raw_command: str) -> None: """Method for running the interpretation of the commands""" + commands = self.get_commands(raw_command) + self.publisher.publish(commands) + + def command_service(self, req, res): + commands = self.get_commands(req.text) + res.commands = commands.commands + return res + + def get_commands(self, raw_command: str): response = ( self.client.beta.chat.completions.parse( model=self.model, @@ -96,7 +118,6 @@ def run(self, raw_command: str) -> None: .choices[0] .message.content ) - try: response_data = json.loads(response) result = CommandListShape(**response_data) @@ -105,7 +126,6 @@ def run(self, raw_command: str) -> None: return self.get_logger().debug(f"Commands interpreted: {result.commands}") - command_list = CommandList() command_list.commands = [ Command( @@ -115,7 +135,7 @@ def run(self, raw_command: str) -> None: ) for command in result.commands ] - self.publisher.publish(command_list) + return command_list def main(args=None): diff --git a/hri/packages/nlp/scripts/extract_data.py b/hri/packages/nlp/scripts/extract_data.py index 172c466..b43d76f 100755 --- a/hri/packages/nlp/scripts/extract_data.py +++ b/hri/packages/nlp/scripts/extract_data.py @@ -4,14 +4,17 @@ Python ROS2 node to extract information from text """ +import json +import os + # Libraries from typing import Optional + import rclpy -from rclpy.node import Node -import os -import openai +from openai import OpenAI from pydantic import BaseModel -import json +from rclpy.node import Node + from frida_interfaces.srv import ExtractInfo EXTRACT_DATA_SERVICE = "/extract_data" @@ -32,9 +35,10 @@ def __init__(self) -> None: """Initialize the ROS2 node""" super().__init__("data_extractor") - self.declare_parameter("base_url", None) + self.declare_parameter("base_url", "None") self.declare_parameter("model", "gpt-4o-2024-08-06") - self.declare_parameter("EXTRACT_DATA_SERVICE_NAME", EXTRACT_DATA_SERVICE) + self.declare_parameter("EXTRACT_DATA_SERVICE", EXTRACT_DATA_SERVICE) + self.declare_parameter("temperature", 0.5) base_url = self.get_parameter("base_url").get_parameter_value().string_value if base_url == "None": @@ -42,11 +46,19 @@ def __init__(self) -> None: else: self.base_url = base_url + self.temperature = ( + self.get_parameter("temperature").get_parameter_value().double_value + ) + + self.client = OpenAI( + api_key=os.getenv("OPENAI_API_KEY", "ollama"), base_url=base_url + ) + model = self.get_parameter("model").get_parameter_value().string_value self.model = model EXTRACT_DATA_SERVICE = ( - self.get_parameter("EXTRACT_DATA_SERVICE_NAME") + self.get_parameter("EXTRACT_DATA_SERVICE") .get_parameter_value() .string_value ) @@ -57,7 +69,6 @@ def __init__(self) -> None: ExtractInfo, EXTRACT_DATA_SERVICE, self.extract_info_requested ) - openai.api_key = os.getenv("OPENAI_API_KEY") self.get_logger().info("Data extractor node started") def extract_info_requested( @@ -69,9 +80,9 @@ def extract_info_requested( instruction = "You will be presented with some text and data to extract. Please provide the requested information or leave empty if it isn't available." response_content = ( - openai.beta.chat.completions.parse( + self.client.beta.chat.completions.parse( model=self.model, - base_url=self.base_url, + temperature=self.temperature, messages=[ {"role": "system", "content": instruction}, { diff --git a/hri/packages/nlp/scripts/stop_listener.py b/hri/packages/nlp/scripts/llm_utils.py old mode 100644 new mode 100755 similarity index 56% rename from hri/packages/nlp/scripts/stop_listener.py rename to hri/packages/nlp/scripts/llm_utils.py index 921a844..dd485d2 --- a/hri/packages/nlp/scripts/stop_listener.py +++ b/hri/packages/nlp/scripts/llm_utils.py @@ -1,11 +1,19 @@ -from pydantic import BaseModel -import openai +#!/usr/bin/env python3 + +"""Miscellanous functions that interact with an LLM.""" + +import json import os -import rclpy -import rclpy.impl.rcutils_logger -from std_msgs.msg import String, Bool from typing import Optional -import json + +import rclpy +from openai import OpenAI +from pydantic import BaseModel +from rclpy.executors import ExternalShutdownException +from rclpy.node import Node +from std_msgs.msg import Bool, String + +from frida_interfaces.srv import Grammar SPEECH_COMMAND_TOPIC = "/speech/raw_command" OUT_COMMAND_TOPIC = "/stop_following" @@ -17,20 +25,23 @@ class ResponseFormat(BaseModel): is_stop: bool -class StopListenerNode(rclpy.node.Node): - logger: rclpy.impl.rcutils_logger.RcutilsLogger +class LLMUtils(Node): model: Optional[str] base_url: Optional[str] def __init__(self) -> None: global SPEECH_COMMAND_TOPIC, OUT_COMMAND_TOPIC - super().__init__("stop_listener") + super().__init__("llm_utils") + self.logger = self.get_logger() + self.logger.info("Initializing llm_utils node") - self.declare_parameter("base_url", None) + self.declare_parameter("base_url", "None") self.declare_parameter("model", "gpt-4o-2024-08-06") self.declare_parameter("SPEECH_COMMAND_TOPIC_NAME", SPEECH_COMMAND_TOPIC) self.declare_parameter("OUT_COMMAND_TOPIC_NAME", OUT_COMMAND_TOPIC) + self.declare_parameter("GRAMMAR_SERVICE", "/nlp/grammar") + self.declare_parameter("temperature", 0.5) base_url = self.get_parameter("base_url").get_parameter_value().string_value if base_url == "None": @@ -40,6 +51,12 @@ def __init__(self) -> None: model = self.get_parameter("model").get_parameter_value().string_value self.model = model + self.client = OpenAI( + api_key=os.getenv("OPENAI_API_KEY", "ollama"), base_url=base_url + ) + self.temperature = ( + self.get_parameter("temperature").get_parameter_value().double_value + ) SPEECH_COMMAND_TOPIC = ( self.get_parameter("SPEECH_COMMAND_TOPIC_NAME") @@ -53,17 +70,18 @@ def __init__(self) -> None: .string_value ) - self.logger = self.get_logger() - self.logger.info("Starting stop listener node") + grammar_service = ( + self.get_parameter("GRAMMAR_SERVICE").get_parameter_value().string_value + ) - openai.api_key = os.getenv("OPENAI_API_KEY") - self.logger.info("Stop listener node started") + self.create_service(Grammar, grammar_service, self.grammar_service) # publisher self.publisher = self.create_publisher(Bool, OUT_COMMAND_TOPIC, 10) self.subscription = self.create_subscription( String, SPEECH_COMMAND_TOPIC, self.callback, 10 ) + self.logger.info("Initialized llm_utils node") def callback(self, data: String) -> None: if data.data == "" or len(data.data) == 0: @@ -78,9 +96,9 @@ def callback(self, data: String) -> None: self.publisher.publish(msg) return - response = openai.beta.chat.completions.parse( + response = self.client.beta.chat.completions.parse( model=self.model, - base_url=self.base_url, + temperature=self.temperature, messages=[ { "role": "system", @@ -104,9 +122,39 @@ def callback(self, data: String) -> None: self.publisher.publish(msg) return + def grammar_service(self, req, res): + response = ( + self.client.beta.chat.completions.parse( + model=self.model, + temperature=self.temperature, + messages=[ + { + "role": "system", + "content": "You will be presented with some text. Your task is to fix the grammar so that the text is correct. Output ONLY the corrected text, don't include any additional explanations.", + }, + {"role": "user", "content": req.text}, + ], + ) + .choices[0] + .message.content + ) + + print("response:", response) + + res.corrected_text = response + + return res + def main(args=None): rclpy.init(args=args) - node = StopListenerNode() - rclpy.spin(node) - rclpy.shutdown() + try: + rclpy.spin(LLMUtils()) + except (ExternalShutdownException, KeyboardInterrupt): + pass + finally: + rclpy.try_shutdown() + + +if __name__ == "__main__": + main() diff --git a/hri/packages/speech/assets/.gitignore b/hri/packages/speech/assets/.gitignore new file mode 100644 index 0000000..cb4b9ce --- /dev/null +++ b/hri/packages/speech/assets/.gitignore @@ -0,0 +1,3 @@ + +# Folder to include all the downloaded and asset files +downloads/ diff --git a/hri/packages/speech/assets/downloads/.gitkeep b/hri/packages/speech/assets/downloads/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/hri/packages/speech/config/hear.yaml b/hri/packages/speech/config/hear.yaml index 1708a6f..879e231 100644 --- a/hri/packages/speech/config/hear.yaml +++ b/hri/packages/speech/config/hear.yaml @@ -2,5 +2,5 @@ hear: ros__parameters: STT_SERVER_IP: "127.0.0.1:50051" START_SERVICE: True - STT_SERVICE_NAME: "stt_service" + STT_SERVICE_NAME: REPLACE detection_publish_topic: "/keyword_detected" \ No newline at end of file diff --git a/hri/packages/speech/launch/devices_launch.py b/hri/packages/speech/launch/devices_launch.py index 5d72979..7b49b14 100755 --- a/hri/packages/speech/launch/devices_launch.py +++ b/hri/packages/speech/launch/devices_launch.py @@ -6,24 +6,28 @@ from frida_constants import ModuleNames, parse_ros_config +USE_RESPEAKER = False +USE_OWW = False + def generate_launch_description(): mic_config = os.path.join( get_package_share_directory("speech"), "config", "microphone.yaml" ) - hear_config = os.path.join( - get_package_share_directory("speech"), "config", "hear.yaml" - ) + hear_config = parse_ros_config( + os.path.join(get_package_share_directory("speech"), "config", "hear.yaml"), + [ModuleNames.HRI.value], + )["hear"]["ros__parameters"] speaker_config = parse_ros_config( os.path.join(get_package_share_directory("speech"), "config", "speaker.yaml"), [ModuleNames.HRI.value], )["say"]["ros__parameters"] - # respeaker_config = os.path.join( - # get_package_share_directory("speech"), "config", "respeaker.yaml" - # ) + respeaker_config = os.path.join( + get_package_share_directory("speech"), "config", "respeaker.yaml" + ) kws_config = os.path.join( get_package_share_directory("speech"), "config", "kws.yaml" @@ -32,24 +36,65 @@ def generate_launch_description(): get_package_share_directory("speech"), "config", "useful_audio.yaml" ) - return LaunchDescription( - [ + nodes = [ + Node( + package="speech", + executable="audio_capturer.py", + name="audio_capturer", + output="screen", + emulate_tty=True, + parameters=[mic_config], + ), + Node( + package="speech", + executable="hear.py", + name="hear", + output="screen", + emulate_tty=True, + parameters=[hear_config], + ), + Node( + package="speech", + executable="say.py", + name="say", + output="screen", + emulate_tty=True, + parameters=[speaker_config], + ), + Node( + package="speech", + executable="useful_audio.py", + name="useful_audio", + output="screen", + emulate_tty=True, + parameters=[useful_audio_config], + ), + ] + + if USE_RESPEAKER: + nodes.append( Node( package="speech", - executable="audio_capturer.py", - name="audio_capturer", + executable="respeaker.py", + name="respeaker", output="screen", emulate_tty=True, - parameters=[mic_config], - ), + parameters=[respeaker_config], + ) + ) + + if USE_OWW: + nodes.append( Node( package="speech", - executable="hear.py", - name="hear", + executable="kws_oww.py", + name="kws_oww", output="screen", emulate_tty=True, - parameters=[hear_config], - ), + ) + ) + else: + nodes.append( Node( package="speech", executable="kws.py", @@ -57,30 +102,7 @@ def generate_launch_description(): output="screen", emulate_tty=True, parameters=[kws_config], - ), - # Node( - # package="speech", - # executable="respeaker.py", - # name="respeaker", - # output="screen", - # emulate_tty=True, - # parameters=[respeaker_config], - # ), - Node( - package="speech", - executable="say.py", - name="say", - output="screen", - emulate_tty=True, - parameters=[speaker_config], - ), - Node( - package="speech", - executable="useful_audio.py", - name="useful_audio", - output="screen", - emulate_tty=True, - parameters=[useful_audio_config], - ), - ] - ) + ) + ) + + return LaunchDescription(nodes) diff --git a/hri/packages/speech/launch/hri_launch.py b/hri/packages/speech/launch/hri_launch.py new file mode 100755 index 0000000..82a90f5 --- /dev/null +++ b/hri/packages/speech/launch/hri_launch.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import os + +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import IncludeLaunchDescription +from launch.launch_description_sources import PythonLaunchDescriptionSource + + +def generate_launch_description(): + # Find the package containing the included launch files + speech_launch_path = os.path.join( + get_package_share_directory("speech"), "launch", "devices_launch.py" + ) + nlp_launch_path = os.path.join( + get_package_share_directory("nlp"), "launch", "nlp_launch.py" + ) + embeddings_launch_path = os.path.join( + get_package_share_directory("embeddings"), "launch", "chroma_launch.py" + ) + + return LaunchDescription( + [ + IncludeLaunchDescription(PythonLaunchDescriptionSource(speech_launch_path)), + IncludeLaunchDescription(PythonLaunchDescriptionSource(nlp_launch_path)), + IncludeLaunchDescription( + PythonLaunchDescriptionSource(embeddings_launch_path) + ), + ] + ) diff --git a/hri/packages/speech/package.xml b/hri/packages/speech/package.xml index 7f37ab4..b30c26a 100644 --- a/hri/packages/speech/package.xml +++ b/hri/packages/speech/package.xml @@ -13,6 +13,7 @@ rclcpp rclpy frida_interfaces + frida_constants ament_lint_auto ament_lint_common diff --git a/hri/packages/speech/scripts/hear.py b/hri/packages/speech/scripts/hear.py index 6056e43..4f2d71e 100755 --- a/hri/packages/speech/scripts/hear.py +++ b/hri/packages/speech/scripts/hear.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 +import os +import sys + +import grpc import rclpy -from rclpy.node import Node -from rclpy.executors import ExternalShutdownException, MultiThreadedExecutor from rclpy.callback_groups import MutuallyExclusiveCallbackGroup -import grpc -from frida_interfaces.msg import AudioData -from frida_interfaces.srv import STT -from std_msgs.msg import Bool, String +from rclpy.executors import ExternalShutdownException, MultiThreadedExecutor +from rclpy.node import Node from speech.speech_api_utils import SpeechApiUtils +from std_msgs.msg import Bool, String -import sys -import os +from frida_interfaces.msg import AudioData +from frida_interfaces.srv import STT # Add the directory containing the protos to the Python path sys.path.append(os.path.join(os.path.dirname(__file__), "stt")) diff --git a/hri/packages/speech/scripts/say.py b/hri/packages/speech/scripts/say.py index ef1f031..c86be9f 100755 --- a/hri/packages/speech/scripts/say.py +++ b/hri/packages/speech/scripts/say.py @@ -10,13 +10,21 @@ from rclpy.node import Node from speech.speech_api_utils import SpeechApiUtils from speech.wav_utils import WavUtils -from std_msgs.msg import Bool, String +from std_msgs.msg import Bool from frida_constants.hri_constants import SPEAK_SERVICE from frida_interfaces.srv import Speak CURRENT_FILE_PATH = os.path.abspath(__file__) -VOICE_DIRECTORY = os.path.join(os.path.dirname(CURRENT_FILE_PATH), "offline_voice") + +FILE_DIR = CURRENT_FILE_PATH[: CURRENT_FILE_PATH.index("install")] +ASSETS_DIR = os.path.join( + FILE_DIR, "src", "hri", "packages", "speech", "assets", "downloads" +) + +VOICE_DIRECTORY = os.path.join(ASSETS_DIR, "offline_voice") + +os.makedirs(VOICE_DIRECTORY, exist_ok=True) class Say(Node): @@ -28,7 +36,6 @@ def __init__(self): self.declare_parameter("speaking_topic", "/saying") self.declare_parameter("SPEAK_SERVICE", SPEAK_SERVICE) - self.declare_parameter("speak_topic", "/speech/speak_now") self.declare_parameter("model", "en_US-amy-medium") self.declare_parameter("offline", True) @@ -57,9 +64,7 @@ def __init__(self): speak_service = ( self.get_parameter("SPEAK_SERVICE").get_parameter_value().string_value ) - speak_topic = ( - self.get_parameter("speak_topic").get_parameter_value().string_value - ) + speaking_topic = ( self.get_parameter("speaking_topic").get_parameter_value().string_value ) @@ -71,20 +76,20 @@ def __init__(self): self.connected = SpeechApiUtils.is_connected() self.create_service(Speak, speak_service, self.speak_service) - self.create_subscription(String, speak_topic, self.speak_topic, 10) self.publisher_ = self.create_publisher(Bool, speaking_topic, 10) self.get_logger().info("Say node initialized.") - def speak_service(self, req): - """When say is called as a service. Caller awaits for the response.""" + def speak_service(self, req, res): self.get_logger().debug("[Service] I will say: " + req.text) - return self.say(req.text) - - def speak_topic(self, msg): - """When say is called as a topic. Caller doesn't wait for response.""" - self.get_logger().debug("[Topic] I will say: " + msg.data) - self.say(msg.data) + if req.text: + self.say(req.text) + res.success = True + else: + res.success = False + self.get_logger().info("[Service] Nothing to say.") + + return res def say(self, text): self.publisher_.publish(Bool(data=True)) diff --git a/hri/requirements/embeddings.txt b/hri/requirements/embeddings.txt deleted file mode 100644 index e8ca769..0000000 --- a/hri/requirements/embeddings.txt +++ /dev/null @@ -1,5 +0,0 @@ - -# Embeddings dependencies -chromadb==0.6.3 -pandas==2.2.3 - diff --git a/hri/requirements/nlp.txt b/hri/requirements/nlp.txt index e272cbd..4f343c2 100644 --- a/hri/requirements/nlp.txt +++ b/hri/requirements/nlp.txt @@ -17,3 +17,7 @@ threadpoolctl==3.4.0 tokenizers==0.15.2 transformers==4.39.3 #triton==2.2.0 + +# Embeddings dependencies +chromadb==0.6.3 +pandas==2.2.3 \ No newline at end of file diff --git a/hri/requirements/speech.txt b/hri/requirements/speech.txt index 58393bc..1a72695 100644 --- a/hri/requirements/speech.txt +++ b/hri/requirements/speech.txt @@ -14,6 +14,9 @@ pygame pvporcupine==3.0.2 pvrecorder==1.2.2 +# KWS with openwakeword +openwakeword + # Respeaker pyusb==1.2.1 pixel-ring==0.1.0 diff --git a/task_manager/scripts/config/hri/debug.py b/task_manager/scripts/config/hri/debug.py index 92f1b0c..4a6defd 100644 --- a/task_manager/scripts/config/hri/debug.py +++ b/task_manager/scripts/config/hri/debug.py @@ -2,7 +2,7 @@ from utils.config import SubtaskConfig -from frida_constants.hri_constants import COMMAND_INTERPRETER_SERVICE, HEAR_SERVICE +from frida_constants.hri_constants import COMMAND_INTERPRETER_SERVICE, STT_SERVICE_NAME def mock_extract_data(query, complete_text): @@ -20,22 +20,22 @@ def mock_extract_data(query, complete_text): "enabled": False, "type": "service", }, - {"topic_name": HEAR_SERVICE, "enabled": False, "type": "service"}, + {"topic_name": STT_SERVICE_NAME, "enabled": False, "type": "service"}, ], "mock_config": [ { "function_name": "extract_data", - "enabled": True, + "enabled": False, "mock_data": mock_extract_data, }, { "function_name": "say", - "enabled": True, + "enabled": False, "mock_data": "Succeeded!", }, { "function_name": "hear", - "enabled": True, + "enabled": False, "mock_data": "Hi Frida, can you bring me a glass of water?", }, ], diff --git a/task_manager/scripts/subtask_managers/hri_tasks.py b/task_manager/scripts/subtask_managers/hri_tasks.py index bf3a2b4..8b4ec44 100755 --- a/task_manager/scripts/subtask_managers/hri_tasks.py +++ b/task_manager/scripts/subtask_managers/hri_tasks.py @@ -8,15 +8,26 @@ import rclpy from rclpy.node import Node -from subtask_meta import SubtaskMeta +from subtask_managers.subtask_meta import SubtaskMeta from frida_constants.hri_constants import ( + ADD_ITEM_SERVICE, COMMAND_INTERPRETER_SERVICE, - DATA_EXTRACTOR_SERVICE, - HEAR_SERVICE, + EXTRACT_DATA_SERVICE, + GRAMMAR_SERVICE, + QUERY_ITEM_SERVICE, SPEAK_SERVICE, + STT_SERVICE_NAME, +) +from frida_interfaces.srv import ( + STT, + AddItem, + CommandInterpreter, + ExtractInfo, + Grammar, + QueryItem, + Speak, ) -from frida_interfaces.srv import STT, CommandInterpreter, ExtractInfo, Speak TIMEOUT = 5.0 @@ -26,46 +37,115 @@ class HRITasks(metaclass=SubtaskMeta): STATE = {"TERMINAL_ERROR": -1, "EXECUTION_ERROR": 0, "EXECUTION_SUCCESS": 1} - def __init__(self, task_manager, config) -> None: + # TODO: perform service checks using config.topic_config + def __init__(self, task_manager, config=None) -> None: self.node = task_manager - self.speak_service = self.node.create_client(Speak, SPEAK_SERVICE) - self.hear_service = self.node.create_client(STT, HEAR_SERVICE) - self.extract_data_service = self.node.create_client( - ExtractInfo, DATA_EXTRACTOR_SERVICE + self.speak_client = self.node.create_client(Speak, SPEAK_SERVICE) + self.hear_client = self.node.create_client(STT, STT_SERVICE_NAME) + self.extract_data_client = self.node.create_client( + ExtractInfo, EXTRACT_DATA_SERVICE ) - self.command_interpreter_service = self.node.create_client( + self.command_interpreter_client = self.node.create_client( CommandInterpreter, COMMAND_INTERPRETER_SERVICE ) + self.grammar_service = self.node.create_client(Grammar, GRAMMAR_SERVICE) + + self.query_item_client = self.node.create_client(QueryItem, QUERY_ITEM_SERVICE) + self.add_item_client = self.node.create_client(AddItem, ADD_ITEM_SERVICE) - def say(self, text: str, now: bool = False) -> None: + def say(self, text: str, wait: bool = False) -> None: """Method to publish directly text to the speech node""" self.node.get_logger().info(f"Sending to saying service: {text}") + request = Speak.Request(text=text) + + future = self.speak_client.call_async(request) + + if wait: + rclpy.spin_until_future_complete(self.node, future) + return ( + HRITasks.STATE["EXECUTION_SUCCESS"] + if future.result().success + else HRITasks.STATE["EXECUTION_ERROR"] + ) + return HRITasks.STATE["EXECUTION_SUCCESS"] + + def extract_data(self, query, complete_text) -> str: + """ + Extracts data from the given query and complete text. + + Args: + query (str): specifies what to extract from complete_text. + complete_text (str): The complete text from which data is to be extracted. + + Returns: + str: The extracted data as a string. If no data is found, an empty string is returned. + """ + self.node.get_logger().info( + f"Sending to extract data service: query={query}, text={complete_text}" + ) - self.speak_service(text) + request = ExtractInfo.Request(data=query, full_text=complete_text) + future = self.extract_data_client.call_async(request) + rclpy.spin_until_future_complete(self.node, future) + return future.result().result - def extract_date(self, query, complete_text) -> str: - pass + def hear(self) -> str: + request = STT.Request() - def hear(self, timeout: float) -> str: - pass + future = self.hear_client.call_async(request) - def interpret_keyword(self, keyword: str, timeout: float) -> str: - pass + rclpy.spin_until_future_complete(self.node, future) - def refactor_sentence(self, sentence: str) -> str: - pass + return future.result().text_heard - def find_closest(self, query: str, options: Union[list[str], str]) -> str: + # TODO + def interpret_keyword(self, keyword: Union[list[str], str], timeout: float) -> str: + """ + Interprets the given keyword(s) within a specified timeout period. + Args: + keyword (Union[list[str], str]): The keyword or list of keywords to interpret. + timeout (float): The maximum time allowed for interpretation in seconds. + Returns: + str: The interpreted result as a string, or an empty string if no result is found within the timeout period. + """ pass + def refactor_text(self, text: str) -> str: + request = Grammar.Request(text=text) + future = self.grammar_service.call_async(request) + rclpy.spin_until_future_complete(self.node, future) + return future.result().corrected_text + + def find_closest(self, query: str, collection: str, top_k: int = 1) -> list[str]: + """ + Finds the closest matching item in a specified collection based on the given query. + + Args: + query (str): The search query to find the closest match for. + collection (str): The name of the collection to search within. + top_k (int, optional): The number of top matches to return. Defaults to 1. + + Returns: + list[str]: The closest matching item(s) from the collection. + """ + request = QueryItem.Request(query=query, collection=collection, topk=top_k) + future = self.query_item_client.call_async(request) + rclpy.spin_until_future_complete(self.node, future) + + return future.result().results + + # TODO def ask(self, question: str) -> str: - """Method to publish directly text to the speech node""" pass - def command_interpreter(self, text: str) -> str: - pass + def command_interpreter(self, text: str) -> CommandInterpreter.Response: + request = CommandInterpreter.Request(text=text) + future = self.command_interpreter_client.call_async(request) + rclpy.spin_until_future_complete(self.node, future) + + return future.result().commands if __name__ == "__main__": diff --git a/task_manager/scripts/test_manager.py b/task_manager/scripts/test_manager.py index 8dc6830..cfdee37 100755 --- a/task_manager/scripts/test_manager.py +++ b/task_manager/scripts/test_manager.py @@ -22,15 +22,44 @@ def __init__(self): def run(self): """testing vision tasks""" - user_request = self.subtask_manager["hri"].hear() - say_res = self.subtask_manager["hri"].say("Hi, my name is frida") + # self.subtask_manager["hri"].say( + # "Hi, my name is frida. What is your favorite drink?", wait=True + # ) + # self.get_logger().info("Hearing from the user...") - print("user_request:", user_request) - print("say_res:", say_res) + # # This line does run + # user_request = self.subtask_manager["hri"].hear() - drink = self.subtask_manager["hri"].extract_data("Drink", user_request) + # self.get_logger().info(f"Heard: {user_request}") - self.get_logger().info(f"Extracted data: {drink}") + # drink = self.subtask_manager["hri"].extract_data("Drink", user_request) + + # self.get_logger().info(f"Extracted data: {drink}") + + # commands = self.subtask_manager["hri"].command_interpreter(user_request) + + # self.get_logger().info(f"Interpreted commands: {commands}") + + # command_strs = [ + # f"I will do action:{command.action}, ({command.complement}), ({command.characteristic})" + # for command in commands + # ] + # command_str = " and ".join(command_strs) + + # fixed_text = self.subtask_manager["hri"].refactor_text(command_str) + # self.subtask_manager["hri"].say(fixed_text) + + self.subtask_manager["hri"].say("I'm frida, Can you tell me where to go?") + location_hint = self.subtask_manager["hri"].hear() + + # Previous line doesn't return + self.get_logger().info(f"location_hint: {location_hint}") + + closest_found = self.subtask_manager["hri"].find_closest( + location_hint, "locations" + ) + + self.subtask_manager["hri"].say(f"Got it, I will go to {closest_found}!") def main(args=None):