livekit
diff --git a/‎examples/basic_room.py‎
Lines changed: 2 additions & 10 deletions b/‎examples/basic_room.py‎
Lines changed: 2 additions & 10 deletions
diff --git a/‎examples/e2ee.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/e2ee.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/face_landmark/face_landmark.py‎
Lines changed: 39 additions & 54 deletions b/‎examples/face_landmark/face_landmark.py‎
Lines changed: 39 additions & 54 deletions
diff --git a/‎examples/publish_hue.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/publish_hue.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/whisper/whisper.py‎
Lines changed: 50 additions & 50 deletions b/‎examples/whisper/whisper.py‎
Lines changed: 50 additions & 50 deletions
@@ -59,20 +59,12 @@ def on_track_subscribed(track: livekit.Track,
         if track.kind == livekit.TrackKind.KIND_VIDEO:
             nonlocal video_stream
             video_stream = livekit.VideoStream(track)
-
-            @video_stream.on("frame_received")
-            def on_video_frame(frame: livekit.VideoFrame):
-                # received a video frame from the track
-                pass
+            # video_stream is an async iterator that yields VideoFrame
         elif track.kind == livekit.TrackKind.KIND_AUDIO:
             print("Subscribed to an Audio Track")
             nonlocal audio_stream
             audio_stream = livekit.AudioStream(track)
-
-            @audio_stream.on('frame_received')
-            def on_audio_frame(frame: livekit.AudioFrame):
-                # received an audio frame from the track
-                pass
+            # audio_stream is an async iterator that yields AudioFrame 
 
     @room.listens_to("track_unsubscribed")
     def on_track_unsubscribed(track: livekit.Track,
 
@@ -35,7 +35,7 @@ async def publish_frames(source: livekit.VideoSource):
 
         source.capture_frame(frame)
 
-        hue += framerate/3  # 3s for a full cycle
+        hue += framerate / 3  # 3s for a full cycle
         if hue >= 1.0:
             hue = 0.0
 
 
@@ -13,8 +13,7 @@
 URL = 'ws://localhost:7880'
 TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY'
 
-frame_queue = Queue()
-argb_frame = None
+tasks = set()
 
 # You can download a face landmark model file from https://developers.google.com/mediapipe/solutions/vision/face_landmarker#models
 model_file = 'face_landmarker.task'
@@ -36,8 +35,7 @@ def draw_landmarks_on_image(rgb_image, detection_result):
     face_landmarks_list = detection_result.face_landmarks
 
     # Loop through the detected faces to visualize.
-    for idx in range(len(face_landmarks_list)):
-        face_landmarks = face_landmarks_list[idx]
+    for face_landmarks in face_landmarks_list:
 
         # Draw the face landmarks.
         face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
@@ -68,72 +66,59 @@ def draw_landmarks_on_image(rgb_image, detection_result):
             .get_default_face_mesh_iris_connections_style())
 
 
-async def room() -> None:
-    room = livekit.Room()
-    await room.connect(URL, TOKEN)
-    print("connected to room: " + room.name)
-
-    video_stream = None
-
-    @room.on("track_subscribed")
-    def on_track_subscribed(track: livekit.Track,
-                            publication: livekit.RemoteTrackPublication,
-                            participant: livekit.RemoteParticipant):
-        if track.kind == livekit.TrackKind.KIND_VIDEO:
-            nonlocal video_stream
-            video_stream = livekit.VideoStream(track)
-
-            @video_stream.on("frame_received")
-            def on_video_frame(frame: livekit.VideoFrame):
-                frame_queue.put(frame)
-
-    await room.run()
-
-
-def display_frames() -> None:
+async def frame_loop(video_stream: livekit.VideoStream) -> None:
+    landmarker = FaceLandmarker.create_from_options(options)
+    argb_frame = None
     cv2.namedWindow('livekit_video', cv2.WINDOW_AUTOSIZE)
     cv2.startWindowThread()
+    async for frame in video_stream:
+        buffer = frame.buffer
 
-    global argb_frame
-
-    with FaceLandmarker.create_from_options(options) as landmarker:
-        while True:
-            frame = frame_queue.get()
-            buffer = frame.buffer
+        if argb_frame is None or argb_frame.width != buffer.width or argb_frame.height != buffer.height:
+            argb_frame = livekit.ArgbFrame(
+                livekit.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height)
 
-            if argb_frame is None or argb_frame.width != buffer.width or argb_frame.height != buffer.height:
-                argb_frame = livekit.ArgbFrame(
-                    livekit.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height)
+        buffer.to_argb(argb_frame)
 
-            buffer.to_argb(argb_frame)
+        arr = np.ctypeslib.as_array(argb_frame.data)
+        arr = arr.reshape((argb_frame.height, argb_frame.width, 4))
+        arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB)
 
-            arr = np.ctypeslib.as_array(argb_frame.data)
-            arr = arr.reshape((argb_frame.height, argb_frame.width, 4))
-            arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB)
+        mp_image = mp.Image(
+            image_format=mp.ImageFormat.SRGB, data=arr)
 
-            mp_image = mp.Image(
-                image_format=mp.ImageFormat.SRGB, data=arr)
+        detection_result = landmarker.detect_for_video(
+            mp_image, frame.timestamp_us)
 
-            detection_result = landmarker.detect_for_video(
-                mp_image, frame.timestamp)
+        draw_landmarks_on_image(arr, detection_result)
 
-            draw_landmarks_on_image(arr, detection_result)
+        arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
 
-            arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
-
-            cv2.imshow('livekit_video', arr)
-            if cv2.waitKey(1) & 0xFF == ord('q'):
-                break
+        cv2.imshow('livekit_video', arr)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
 
+    landmarker.close()
     cv2.destroyAllWindows()
 
 
 async def main() -> None:
-    loop = asyncio.get_event_loop()
-    future = loop.run_in_executor(None, asyncio.run, room())
+    room = livekit.Room()
+    await room.connect(URL, TOKEN)
+    print("connected to room: " + room.name)
+
+    video_stream = None
 
-    display_frames()
-    await future
+    @room.on("track_subscribed")
+    def on_track_subscribed(track: livekit.Track, *_):
+        if track.kind == livekit.TrackKind.KIND_VIDEO:
+            nonlocal video_stream
+            video_stream = livekit.VideoStream(track)
+            task = asyncio.create_task(frame_loop(video_stream))
+            tasks.add(task)
+            task.add_done_callback(tasks.remove)
+
+    await room.run()
 
 if __name__ == "__main__":
     asyncio.run(main())
@@ -25,7 +25,7 @@ async def publish_frames(source: livekit.VideoSource):
             0, livekit.VideoRotation.VIDEO_ROTATION_0, argb_frame.to_i420())
 
         rgb = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
-        rgb = [(x * 255) for x in rgb] # type: ignore
+        rgb = [(x * 255) for x in rgb]  # type: ignore
 
         argb_color = np.array(rgb + [255], dtype=np.uint8)
         arr.flat[::4] = argb_color[0]
@@ -35,7 +35,7 @@ async def publish_frames(source: livekit.VideoSource):
 
         source.capture_frame(frame)
 
-        hue += framerate/3  # 3s for a full cycle
+        hue += framerate / 3  # 3s for a full cycle
         if hue >= 1.0:
             hue = 0.0
 
 
@@ -98,56 +98,56 @@ class WhisperFullParams(ctypes.Structure):
 whisper.whisper_full_get_segment_text.restype = ctypes.c_char_p
 ctx = whisper.whisper_init_from_file(fname_model.encode('utf-8'))
 
-data_30_secs = np.zeros(SAMPLES_30_SECS, dtype=np.float32)
-written_samples = 0  # nb. of samples written to data_30_secs for the cur. inference
 
-
-def on_audio_frame(frame: livekit.AudioFrame):
-    global data_30_secs, written_samples
-
-    # whisper requires 16kHz mono, so resample the data
-    # also convert the samples from int16 to float32
-    frame = frame.remix_and_resample(
-        WHISPER_SAMPLE_RATE, 1)
-
-    data = np.array(frame.data, dtype=np.float32) / 32768.0
-
-    # write the data inside data_30_secs at written_samples
-    data_start = SAMPLES_KEEP + written_samples
-    data_30_secs[data_start:data_start+len(data)] = data
-    written_samples += len(data)
-
-    if written_samples >= SAMPLES_STEP:
-        params = whisper.whisper_full_default_params(
-            WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY)
-        params.print_realtime = False
-        params.print_progress = False
-
-        ctx_ptr = ctypes.c_void_p(ctx)
-        data_ptr = data_30_secs.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
-        res = whisper.whisper_full(ctx_ptr,
-                                   params,
-                                   data_ptr,
-                                   written_samples + SAMPLES_KEEP)
-
-        if res != 0:
-            logging.error("error while running inference: %s", res)
-            return
-
-        n_segments = whisper.whisper_full_n_segments(ctx_ptr)
-        for i in range(n_segments):
-            t0 = whisper.whisper_full_get_segment_t0(ctx_ptr, i)
-            t1 = whisper.whisper_full_get_segment_t1(ctx_ptr, i)
-            txt = whisper.whisper_full_get_segment_text(ctx_ptr, i)
-
-            logging.info(
-                f"{t0/1000.0:.3f} - {t1/1000.0:.3f} : {txt.decode('utf-8')}")
-
-        # write old data to the beginning of the buffer (SAMPLES_KEEP)
-        data_30_secs[:SAMPLES_KEEP] = data_30_secs[data_start +
-                                                   written_samples - SAMPLES_KEEP:
-                                                   data_start + written_samples]
-        written_samples = 0
+async def whisper_task(stream: livekit.AudioStream):
+    data_30_secs = np.zeros(SAMPLES_30_SECS, dtype=np.float32)
+    written_samples = 0  # nb. of samples written to data_30_secs for the cur. inference
+
+    async for frame in stream:
+        # whisper requires 16kHz mono, so resample the data
+        # also convert the samples from int16 to float32
+        frame = frame.remix_and_resample(
+            WHISPER_SAMPLE_RATE, 1)
+
+        data = np.array(frame.data, dtype=np.float32) / 32768.0
+
+        # write the data inside data_30_secs at written_samples
+        data_start = SAMPLES_KEEP + written_samples
+        data_30_secs[data_start:data_start+len(data)] = data
+        written_samples += len(data)
+
+        if written_samples >= SAMPLES_STEP:
+            params = whisper.whisper_full_default_params(
+                WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY)
+            params.print_realtime = False
+            params.print_progress = False
+
+            ctx_ptr = ctypes.c_void_p(ctx)
+            data_ptr = data_30_secs.ctypes.data_as(
+                ctypes.POINTER(ctypes.c_float))
+            res = whisper.whisper_full(ctx_ptr,
+                                       params,
+                                       data_ptr,
+                                       written_samples + SAMPLES_KEEP)
+
+            if res != 0:
+                logging.error("error while running inference: %s", res)
+                return
+
+            n_segments = whisper.whisper_full_n_segments(ctx_ptr)
+            for i in range(n_segments):
+                t0 = whisper.whisper_full_get_segment_t0(ctx_ptr, i)
+                t1 = whisper.whisper_full_get_segment_t1(ctx_ptr, i)
+                txt = whisper.whisper_full_get_segment_text(ctx_ptr, i)
+
+                logging.info(
+                    f"{t0/1000.0:.3f} - {t1/1000.0:.3f} : {txt.decode('utf-8')}")
+
+            # write old data to the beginning of the buffer (SAMPLES_KEEP)
+            data_30_secs[:SAMPLES_KEEP] = data_30_secs[data_start +
+                                                       written_samples - SAMPLES_KEEP:
+                                                       data_start + written_samples]
+            written_samples = 0
 
 
 async def main():
@@ -172,7 +172,7 @@ def on_track_subscribed(track: livekit.Track,
         logging.info("starting listening to: %s", participant.identity)
         nonlocal audio_stream
         audio_stream = livekit.AudioStream(track)
-        audio_stream.add_listener('frame_received', on_audio_frame)
+        asyncio.create_task(whisper_task(audio_stream))
 
     try:
         logging.info("connecting to %s", URL)