From a7bba52887e9df338b750b981315801742d371a2 Mon Sep 17 00:00:00 2001
From: mikramarc <michal@shadowrobot.com>
Date: Wed, 26 May 2021 09:49:07 +0000
Subject: [PATCH 1/4] fixnig

---
 sr_speech_control/README.md                   | 48 ++++------
 .../config/similar_words_dict.yaml            |  3 +
 .../src/sr_speech_control/speech_control.py   | 87 +++++++++++++------
 3 files changed, 79 insertions(+), 59 deletions(-)
 create mode 100644 sr_speech_control/config/similar_words_dict.yaml
diff --git a/sr_speech_control/README.md b/sr_speech_control/README.md
index 008ec04f..3b716dfd 100644
--- a/sr_speech_control/README.md
+++ b/sr_speech_control/README.md
@@ -1,7 +1,7 @@
 # sr_speech_control
 
 A node for controlling various systems using speech.
-Commands are being sent to a topic `sr_speech_control` in a form of std_msgs
+Commands are being sent to a chosen topic in a form of std_msgs
 String and can be intercepted by other nodes in order to execute actions.
 
 ## Usage
@@ -12,38 +12,22 @@ rosrun sr_speech_control speech_control.py
 ```
 
 The node will permanenly listen to the microphone input, will use Google speech
-recognition to translate audio to text, check if text starts with a trigger
-word `shadow` and if so will publish text after the trigger word to topic
-`sr_speech_control`.
+recognition to translate audio to text, check if text starts with a chosen trigger word and if so, will publish word after the trigger word to a `sr_speech_control` topic (by default) .
 
-## Known problems
+The class used has four parameters that can be provided in order to modify the behaviour:
+- `trigger_word` - sets a work that preceeds a command to be sent
+- `command_words` - list of commands that are allowed to be sent
+- `similar_words_dict_path` - path to a yaml file containing dictionary of words that are easily mistaken for a trigger word or one of the command words
+- `non_speaking_duration` - seconds of non-speaking audio to keep on both sides of the recording
+- `pause_threshold` - seconds of non-speaking audio before a phrase is considered complete
 
-Testing revealed that microphone devices available within Docker container are
-significantly worse than on the host machine. That can be easily demonstrated
-by testing Google Chrome browser speech recognition on the host and inside
-Docker container. Known workaround for that is to use pulseaudio server on host
-machine for sound capture. For that install `paprefs` program on the host:
-```
-sudo apt-get install paprefs
-```
-and run it. In "Network Server" tab, and check the "Enable network access to
-local sound devices" checkbox and other two sub-checkboxes in order not to
-require authentications. You might need to reboot host machine for this setting
-to be used.
-Run `pax11publish` utility program to find out pulseaudio server port (most
-likely 4713).
-On the container run:
-```
-export "PULSE_SERVER=tcp:<host IP address>:<host pulseaudio port>"
-```
-For example if your host IP address is `192.168.1.2`, then run:
-```
-export "PULSE_SERVER=tcp:192.168.1.2:4713"
-```
-Alternatively it is possible to map unix sockets instead of tcp but it requires
-adding new parameters when launching Docker container.
+An example usage can be seen in the `speech_control.py` file:
+```python
+    trigger_word = "shadow"
+    command_words = ["grasp", "release", "disable", "enable", "engage"]
+    similar_words_dict_path = rospkg.RosPack().get_path('sr_speech_control') + '/config/similar_words_dict.yaml'
+
+    sc = SpeechControl(trigger_word, command_words, similar_words_dict_path=similar_words_dict_path)
+    sc.run()
 
-To use node with pulseaudio microphone specify `prefer_microphone` parameter:
-```
-rosrun sr_speech_control speech_control.py _prefer_microphone:=pulse
 ```
diff --git a/sr_speech_control/config/similar_words_dict.yaml  b/sr_speech_control/config/similar_words_dict.yaml 
new file mode 100644
index 00000000..140b1ee1
--- /dev/null
+++ b/sr_speech_control/config/similar_words_dict.yaml 	
@@ -0,0 +1,3 @@
+shallow: shadow
+shiloh: shadow
+app: up
diff --git a/sr_speech_control/src/sr_speech_control/speech_control.py b/sr_speech_control/src/sr_speech_control/speech_control.py
index ae6570c6..a952ab04 100755
--- a/sr_speech_control/src/sr_speech_control/speech_control.py
+++ b/sr_speech_control/src/sr_speech_control/speech_control.py
@@ -15,33 +15,50 @@
 # with this program. If not, see <http://www.gnu.org/licenses/>.
 
 import rospy
+import rospkg
+import time
 import speech_recognition as sr
 from difflib import get_close_matches
 from std_msgs.msg import String
+import yaml
 
 
 class SpeechControl(object):
-    def __init__(self):
-        self.microphone = sr.Microphone()
-        prefer_microphone = rospy.get_param('~prefer_microphone')
-        if prefer_microphone:
-            for i, microphone_name in enumerate(sr.Microphone.list_microphone_names()):
-                if prefer_microphone in microphone_name:
-                    self.microphone = sr.Microphone(device_index=i)
-                    rospy.loginfo("Using preferred microphone: {}".format(microphone_name))
-                    break
-        self.trigger_word = rospy.get_param('~trigger_word', 'shadow')
+    def __init__(self, trigger_word, command_words, command_topic='sr_speech_control',
+                 similar_words_dict_path=None, non_speaking_duration=0.2, pause_threshold=0.2):
+        self.trigger_word = trigger_word
+        self.command_words = command_words
         self.recognizer = sr.Recognizer()
-        self._set_param_if_provided(self.recognizer, 'non_speaking_duration')
-        self._set_param_if_provided(self.recognizer, 'pause_threshold')
-        self.command_words = rospy.get_param('~command_words', [])
-        topic = rospy.get_param('~topic', 'sr_speech_control')
-        self.command_publisher = rospy.Publisher(topic, String, queue_size=1)
+        self.command_publisher = rospy.Publisher(command_topic, String, queue_size=1)
+        self.command_to_be_executed = None
+        self.similar_words_dict = {}
+
+        if similar_words_dict_path:
+            self.parse_similar_words_dict(similar_words_dict_path)
+        self._init_recognizer(non_speaking_duration, pause_threshold)
         self._stop_listening = self.recognizer.listen_in_background(self.microphone, self._recognizer_callback)
 
-    def _set_param_if_provided(self, object_to_set, param_name):
-        if rospy.has_param('~' + param_name):
-            setattr(object_to_set, param_name, rospy.get_param('~' + param_name))
+
+    def parse_similar_words_dict(self, path_name):
+        with open(path_name, 'r') as stream:
+            self.similar_words_dict = yaml.safe_load(stream)
+
+    def _init_recognizer(self, non_speaking_duration, pause_threshold):
+        for idx, mic in enumerate(sr.Microphone.list_microphone_names()):
+            rospy.loginfo('{}: {}'.format(idx, mic))
+
+        while True:
+            try:
+                idx = input("Choose one of the microphones from the list above. Type the index and press [RETURN]\n")
+                self.microphone = sr.Microphone(device_index=int(idx))
+                with self.microphone as source:
+                    self.recognizer.adjust_for_ambient_noise(source)
+                    break
+            except OSError:
+                rospy.logwarn("Wrong microphone. Try again.")
+
+        self.recognizer.non_speaking_duration = non_speaking_duration
+        self.recognizer.pause_threshold = pause_threshold
 
     def _recognizer_callback(self, recognizer, audio):
         try:
@@ -53,21 +70,37 @@ def _recognizer_callback(self, recognizer, audio):
             return
 
         result = [str(x).lower() for x in result.split(' ')]
-        if len(result) > 1:
-            if self._filter_word(result[0], [self.trigger_word]) == self.trigger_word:
-                self.command_publisher.publish(' '.join([self._filter_word(x, self.command_words) for x in result[1:]]))
+
+        if self._filter_word(result[0], self.trigger_word) == self.trigger_word:
+            command = self._filter_word(''.join(result[1:]), self.command_words)
+            if command in self.command_words:
+                self.command_to_be_executed = command
 
     def _filter_word(self, word, dictionary, offset=0.5):
+        if word in self.similar_words_dict:
+            word = self.similar_words_dict[word]
+
         result = get_close_matches(word, dictionary, 1, offset)
         if not result:
             return word
         return result[0]
 
+    def run(self):
+        rospy.loginfo("Started speech control. Trigger word: {}".format(self.trigger_word))
+        while not rospy.is_shutdown():
+                if self.command_to_be_executed:
+                    rospy.loginfo("Executing: {}.".format(self.command_to_be_executed))
+                    self.command_publisher.publish(self.command_to_be_executed)
+                    self.command_to_be_executed = None
+        self._stop_listening(wait_for_stop=False)
+
+
 if __name__ == "__main__":
-    rospy.init_node('sr_speech_control', anonymous=True)
+    rospy.init_node('example_speech_control', anonymous=True)
+
+    trigger_word = "shadow"
+    command_words = ["grasp", "release", "disable", "enable", "engage"]
+    similar_words_dict_path = rospkg.RosPack().get_path('sr_speech_control') + '/config/similar_words_dict.yaml'
 
-    sc = SpeechControl()
-    rospy.loginfo("Started speech control. Trigger word: {}, command words: {}".format(
-        sc.trigger_word, sc.command_words))
-    rospy.spin()
-    sc._stop_listening(wait_for_stop=False)
+    sc = SpeechControl(trigger_word, command_words, similar_words_dict_path=similar_words_dict_path)
+    sc.run()

From e4d2d2f379812cde5cd720cb887c05bf0dbaf074 Mon Sep 17 00:00:00 2001
From: mikramarc <michal@shadowrobot.com>
Date: Wed, 26 May 2021 09:51:40 +0000
Subject: [PATCH 2/4] fixing typo

---
 .../config/{similar_words_dict.yaml  => similar_words_dict.yaml}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sr_speech_control/config/{similar_words_dict.yaml  => similar_words_dict.yaml} (100%)

diff --git a/sr_speech_control/config/similar_words_dict.yaml  b/sr_speech_control/config/similar_words_dict.yaml
similarity index 100%
rename from sr_speech_control/config/similar_words_dict.yaml 
rename to sr_speech_control/config/similar_words_dict.yaml

From f9e080387e4663e7d11dda99eb2a30e2e4e36a43 Mon Sep 17 00:00:00 2001
From: mikramarc <michal@shadowrobot.com>
Date: Wed, 26 May 2021 11:30:15 +0100
Subject: [PATCH 3/4] Update speech_control.py

---
 .../src/sr_speech_control/speech_control.py    | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/sr_speech_control/src/sr_speech_control/speech_control.py b/sr_speech_control/src/sr_speech_control/speech_control.py
index a952ab04..1401090b 100755
--- a/sr_speech_control/src/sr_speech_control/speech_control.py
+++ b/sr_speech_control/src/sr_speech_control/speech_control.py
@@ -38,7 +38,6 @@ def __init__(self, trigger_word, command_words, command_topic='sr_speech_control
         self._init_recognizer(non_speaking_duration, pause_threshold)
         self._stop_listening = self.recognizer.listen_in_background(self.microphone, self._recognizer_callback)
 
-
     def parse_similar_words_dict(self, path_name):
         with open(path_name, 'r') as stream:
             self.similar_words_dict = yaml.safe_load(stream)
@@ -47,10 +46,13 @@ def _init_recognizer(self, non_speaking_duration, pause_threshold):
         for idx, mic in enumerate(sr.Microphone.list_microphone_names()):
             rospy.loginfo('{}: {}'.format(idx, mic))
 
-        while True:
+        while not rospy.is_shutdown():
             try:
-                idx = input("Choose one of the microphones from the list above. Type the index and press [RETURN]\n")
-                self.microphone = sr.Microphone(device_index=int(idx))
+                idx = raw_input("Choose one of the microphones from the list above. Type the index and press [RETURN]\n")
+                if not idx:
+                    self.microphone = sr.Microphone()
+                else:
+                    self.microphone = sr.Microphone(device_index=int(idx))
                 with self.microphone as source:
                     self.recognizer.adjust_for_ambient_noise(source)
                     break
@@ -88,10 +90,10 @@ def _filter_word(self, word, dictionary, offset=0.5):
     def run(self):
         rospy.loginfo("Started speech control. Trigger word: {}".format(self.trigger_word))
         while not rospy.is_shutdown():
-                if self.command_to_be_executed:
-                    rospy.loginfo("Executing: {}.".format(self.command_to_be_executed))
-                    self.command_publisher.publish(self.command_to_be_executed)
-                    self.command_to_be_executed = None
+            if self.command_to_be_executed:
+                rospy.loginfo("Executing: {}.".format(self.command_to_be_executed))
+                self.command_publisher.publish(self.command_to_be_executed)
+                self.command_to_be_executed = None
         self._stop_listening(wait_for_stop=False)
 
 

From 3d7e19bb5e77f111686b5b1b70325f33c5eb1de7 Mon Sep 17 00:00:00 2001
From: mikramarc <michal@shadowrobot.com>
Date: Wed, 26 May 2021 12:07:36 +0000
Subject: [PATCH 4/4] lint

---
 sr_speech_control/src/sr_speech_control/speech_control.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sr_speech_control/src/sr_speech_control/speech_control.py b/sr_speech_control/src/sr_speech_control/speech_control.py
index 1401090b..b5282f64 100755
--- a/sr_speech_control/src/sr_speech_control/speech_control.py
+++ b/sr_speech_control/src/sr_speech_control/speech_control.py
@@ -48,7 +48,7 @@ def _init_recognizer(self, non_speaking_duration, pause_threshold):
 
         while not rospy.is_shutdown():
             try:
-                idx = raw_input("Choose one of the microphones from the list above. Type the index and press [RETURN]\n")
+                idx = raw_input("Pick one of the microphones from the list above. Type the index and execute.\n")
                 if not idx:
                     self.microphone = sr.Microphone()
                 else: