diff --git a/cookbook/agents/48_video_caption_agent.py b/cookbook/agents/48_video_caption_agent.py
index 31ede7d4c..bab39bca3 100644
--- a/cookbook/agents/48_video_caption_agent.py
+++ b/cookbook/agents/48_video_caption_agent.py
@@ -1,3 +1,7 @@
+"""Please install dependencies using:
+pip install openai moviepy ffmpeg
+"""
+
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
 from phi.tools.moviepy_video_tools import MoviePyVideoTools
diff --git a/phi/tools/moviepy_video_tools.py b/phi/tools/moviepy_video_tools.py
index 1c420036c..7e86a7bf6 100644
--- a/phi/tools/moviepy_video_tools.py
+++ b/phi/tools/moviepy_video_tools.py
@@ -1,6 +1,6 @@
+from typing import List, Dict, Optional
 from phi.tools import Toolkit
 from phi.utils.log import logger
-from typing import List, Dict, Optional
 
 try:
     from moviepy import VideoFileClip, TextClip, CompositeVideoClip, ColorClip  # type: ignore
@@ -27,7 +27,14 @@ def __init__(
             self.register(self.embed_captions)
 
     def split_text_into_lines(self, words: List[Dict]) -> List[Dict]:
-        """Split words into lines based on duration and length constraints"""
+        """Split transcribed words into lines based on duration and length constraints
+
+        Args:
+            words: List of dictionaries containing word data with 'word', 'start', and 'end' keys
+
+        Returns:
+            List[Dict]: List of subtitle lines, each containing word, start time, end time, and text contents
+        """
         MAX_CHARS = 30
         MAX_DURATION = 2.5
         MAX_GAP = 1.5
@@ -79,7 +86,20 @@ def create_caption_clips(
         stroke_color="black",
         stroke_width=1.5,
     ) -> List[TextClip]:
-        """Create word-level caption clips with highlighting"""
+        """Create word-level caption clips with highlighting effects
+
+        Args:
+            text_json: Dictionary containing text and timing information
+            frame_size: Tuple of (width, height) for the video frame
+            font: Font family to use for captions
+            color: Base text color
+            highlight_color: Color for highlighted words
+            stroke_color: Color for text outline
+            stroke_width: Width of text outline
+
+        Returns:
+            List[TextClip]: List of MoviePy TextClip objects for each word and highlight
+        """
         word_clips = []
         x_pos = 0
         y_pos = 0
@@ -157,7 +177,14 @@ def create_caption_clips(
         return word_clips
 
     def parse_srt(self, srt_content: str) -> List[Dict]:
-        """Parse SRT format and extract word timing"""
+        """Convert SRT formatted content into word-level timing data
+
+        Args:
+            srt_content: String containing SRT formatted subtitles
+
+        Returns:
+            List[Dict]: List of words with their timing information
+        """
         words = []
         lines = srt_content.strip().split("\n\n")
 
@@ -216,14 +243,14 @@ def extract_audio(self, video_path: str, output_path: str) -> str:
             return f"Failed to extract audio: {str(e)}"
 
     def create_srt(self, transcription: str, output_path: str) -> str:
-        """Convert transcription to SRT format
+        """Save transcription text to SRT formatted file
 
         Args:
-            transcription: Text transcription
+            transcription: Text transcription in SRT format
             output_path: Path where the SRT file will be saved
 
         Returns:
-            str: Path to the created SRT file
+            str: Path to the created SRT file, or error message if failed
         """
         try:
             # Since we're getting SRT format from Whisper API now,
@@ -245,7 +272,20 @@ def embed_captions(
         stroke_color: str = "black",
         stroke_width: int = 1,
     ) -> str:
-        """Embed scrolling captions with word-level highlighting into video"""
+        """Create a new video with embedded scrolling captions and word-level highlighting
+
+        Args:
+            video_path: Path to the input video file
+            srt_path: Path to the SRT caption file
+            output_path: Path for the output video (optional)
+            font_size: Size of caption text
+            font_color: Color of caption text
+            stroke_color: Color of text outline
+            stroke_width: Width of text outline
+
+        Returns:
+            str: Path to the captioned video file, or error message if failed
+        """
         try:
             # If no output path provided, create one based on input video
             if output_path is None: