Merge pull request #1110 from pointcontrols/audio_capture_windows

Audio capture support on Windows
pupil-labs · Mar 13, 2018 · 7d323da · 7d323da
2 parents 65a5ec4 + 9ae78d1
commit 7d323da
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 14 deletions.
diff --git a/pupil_src/launchables/world.py b/pupil_src/launchables/world.py
@@ -228,6 +228,7 @@ def on_resize(window, w, h):
             camera_render_size = w-int(icon_bar_width*g_pool.gui.scale), h
             g_pool.gui.update_window(*window_size)
             g_pool.gui.collect_menus()
+
             for p in g_pool.plugins:
                 p.on_window_resize(window, *camera_render_size)
 

diff --git a/pupil_src/shared_modules/audio/__init__.py b/pupil_src/shared_modules/audio/__init__.py
@@ -22,6 +22,7 @@
 logger = logging.getLogger(__name__)
 
 
+
 audio_modes = ('voice and sound', 'sound only','voice only','silent')
 default_audio_mode = audio_modes[0]
 audio_mode = default_audio_mode
@@ -136,6 +137,36 @@ def say(message):
         if 'voice' in audio_mode:
             sp.Popen(["say", message, "-v" "Victoria"])
 
+elif os_name == "Windows":
+    def beep():
+        if 'sound' in audio_mode:
+            print('\a')
+
+    def tink():
+        if 'sound' in audio_mode:
+            print('\a')
+
+    def say(message):
+        if 'voice' in audio_mode:
+            print('\a')
+            print(message)
+
+
+    class Audio_Input_Dict(dict):
+        """docstring for Audio_Input_Dict"""
+        def __init__(self):
+            super().__init__()
+            try:
+                import pyaudio as pa
+            except ImportError:
+                logger.info('Please install pyaudio for audio capture on Windows')
+            else:
+                pyaudio = pa.PyAudio()
+                ds_info = pyaudio.get_host_api_info_by_type(pa.paDirectSound)
+                for input_dev_name in [dev_info['name'] for dev_info in [pyaudio.get_device_info_by_host_api_device_index(ds_info['index'], dev_idx) for dev_idx in range(ds_info['deviceCount'])] if dev_info['maxInputChannels'] > 0]:
+                    self[input_dev_name] = input_dev_name
+            self['No Audio'] = None
+
 else:
     def beep():
         if 'sound' in audio_mode:

diff --git a/pupil_src/shared_modules/audio_capture.py b/pupil_src/shared_modules/audio_capture.py
@@ -152,6 +152,8 @@ def capture_thread(self, audio_src, running, recording):
                 in_container = av.open('none:{}'.format(audio_src), format="avfoundation")
             elif platform.system() == "Linux":
                 in_container = av.open('hw:{}'.format(audio_src), format="alsa")
+            elif platform.system() == "Windows":
+                in_container = av.open('audio={}'.format(audio_src), format="dshow", options={'audio_buffer_size':'23'})
             else:
                 raise av.AVError('Platform does not support audio capture.')
         except av.AVError as err:
@@ -172,7 +174,6 @@ def capture_thread(self, audio_src, running, recording):
         out_container = None
         out_stream = None
         timestamps = None
-        out_frame_num = 0
         in_frame_size = 0
 
         stream_epoch = in_stream.start_time * in_stream.time_base
@@ -196,22 +197,28 @@ def close_recording():
             # Bind nonlocal variables, https://www.python.org/dev/peps/pep-3104/
             nonlocal out_container, out_stream, in_stream, in_frame_size, timestamps, out_frame_num
             if out_container is not None:
-                packet = out_stream.encode(audio_frame)
-                while packet is not None:
-                    out_frame_num += 1
-                    out_container.mux(packet)
-                    packet = out_stream.encode(audio_frame)
+                timestamps.append(timestamp)
+                out_packets = [out_stream.encode(audio_frame)]
+                while out_packets[-1]:
+                    out_packets.append(out_stream.encode(None))
+                for out_packet in out_packets:
+                    if out_packet is not None:
+                        out_container.mux(out_packet)
                 out_container.close()
 
+                out_frame_num = out_stream.frames
                 in_frame_rate = in_stream.rate
                 # in_stream.frame_size does not return the correct value.
                 out_frame_size = out_stream.frame_size
                 out_frame_rate = out_stream.rate
 
-                old_ts_idx = np.arange(0, len(timestamps) * in_frame_size, in_frame_size) / in_frame_rate
                 new_ts_idx = np.arange(0, out_frame_num * out_frame_size, out_frame_size) / out_frame_rate
-                interpolate = interp1d(old_ts_idx, timestamps, bounds_error=False, fill_value='extrapolate')
-                new_ts = interpolate(new_ts_idx)
+                if in_frame_rate != out_frame_rate:
+                    old_ts_idx = np.arange(0, len(timestamps) * in_frame_size, in_frame_size) / in_frame_rate
+                    interpolate = interp1d(old_ts_idx, timestamps, bounds_error=False, fill_value='extrapolate')
+                    new_ts = interpolate(new_ts_idx)
+                else:
+                    new_ts = timestamps[0] + new_ts_idx
 
                 ts_loc = os.path.join(self.rec_dir, 'audio_timestamps.npy')
                 np.save(ts_loc, new_ts)
@@ -226,20 +233,22 @@ def close_recording():
             # finally add pupil timebase offset to adjust for settable timebase.
             for audio_frame in packet.decode():
                 timestamp = audio_frame.pts * in_stream.time_base + clock_differences - self.g_pool.timebase.value
+
                 if recording.is_set():
                     if out_container is None:
                         rec_file = os.path.join(self.rec_dir, 'audio.mp4')
                         out_container = av.open(rec_file, 'w')
-                        out_stream = out_container.add_stream('aac')
+                        out_stream = out_container.add_stream('aac', rate=in_stream.rate)
                         out_frame_num = 0
                         in_frame_size = audio_frame.samples  # set here to make sure full packet size is used
                         timestamps = []
 
                     timestamps.append(timestamp)
-                    packet = out_stream.encode(audio_frame)
-                    if packet is not None:
-                        out_frame_num += 1
-                        out_container.mux(packet)
+                    out_packets = [out_stream.encode(audio_frame)]
+                    for out_packet in out_packets:
+                        if out_packet is not None:
+                            out_container.mux(out_packet)
+
                 elif out_container is not None:
                     # recording stopped
                     close_recording()