convert video to shorts

phidatahq · Dec 30, 2024 · 817597d · 817597d
1 parent a4cf57f
commit 817597d
Showing 1 changed file with 137 additions and 0 deletions.
diff --git a/cookbook/agents/47_video_to_shorts.py b/cookbook/agents/47_video_to_shorts.py
@@ -0,0 +1,137 @@
+"""
+1. Install dependencies using: `pip install phidata opencv-python google-generativeai sqlalchemy pydantic`
+2. Install ffmpeg `brew install ffmpeg` 
+2. Run the script using: `python cookbook/agents/47_video_to_shorts.py`
+"""
+from phi.agent import Agent
+from phi.model.google import Gemini
+from google.generativeai import upload_file, get_file
+import time
+import subprocess
+from pathlib import Path
+from phi.utils.log import logger
+
+video_path = "cookbook/agents/videoplayback (1).mp4"
+output_dir = "output/shorts/weird-food"
+
+agent = Agent(
+    name="Video2Shorts",
+    description="Process videos and generate engaging shorts.",
+    model=Gemini(id="gemini-2.0-flash-exp"), 
+    markdown=True,
+    debug_mode=True,
+
+    structured_outputs=True,
+)
+
+# 2. Upload and process video
+video_file = upload_file(video_path)
+while video_file.state.name == "PROCESSING":
+    time.sleep(2)
+    video_file = get_file(video_file.name)
+
+# 3. Multimodal Query for Video Analysis
+query = """
+
+You are an expert in video content creation, specializing in crafting engaging short-form content for platforms like YouTube Shorts and Instagram Reels. Your task is to analyze the provided video and identify segments that maximize viewer engagement.
+
+For each video, you'll:
+
+1. Identify key moments that will capture viewers' attention, focusing on:
+   - High-energy sequences
+   - Emotional peaks
+   - Surprising or unexpected moments
+   - Strong visual and audio elements
+   - Clear narrative segments with compelling storytelling
+
+2. Extract segments that work best for short-form content, considering:
+   - Optimal length (strictly 15–60 seconds)
+   - Natural start and end points that ensure smooth transitions
+   - Engaging pacing that maintains viewer attention
+   - Audio-visual harmony for an immersive experience
+   - Vertical format compatibility and adjustments if necessary
+
+3. Provide a detailed analysis of each segment, including:
+   - Precise timestamps (Start Time | End Time in MM:SS format)
+   - A clear description of why the segment would be engaging
+   - Suggestions on how to enhance the segment for short-form content
+   - An importance score (1-10) based on engagement potential
+
+Instructions:
+1. Analyze the provided video directly—do NOT reference or analyze any external sources or YouTube videos.
+2. Identify engaging moments that meet the specified criteria for short-form content.
+3. Provide your analysis in a **table format** with these columns:
+   - Start Time | End Time | Description | Importance Score
+4. Ensure all timestamps use MM:SS format and importance scores range from 1-10.
+5. Focus only on segments between 15 and 60 seconds long.
+6. Base your analysis solely on the provided video content.
+7. Deliver actionable insights to improve the identified segments for short-form optimization.
+
+Your goal is to identify moments that are visually compelling, emotionally engaging, and perfectly optimized for short-form platforms.
+"""
+
+# 4. Generate Video Analysis
+response = agent.run(query, videos=[video_file])
+
+# 5. Create output directory
+output_dir = Path(output_dir)
+output_dir.mkdir(parents=True, exist_ok=True)
+
+# 6. Extract and cut video segments
+def extract_segments(response_text):
+    import re
+    segments_pattern = r'\|\s*(\d+:\d+)\s*\|\s*(\d+:\d+)\s*\|\s*(.*?)\s*\|\s*(\d+)\s*\|'
+    segments = []
+
+    for match in re.finditer(segments_pattern, str(response_text)):
+        start_time = match.group(1)
+        end_time = match.group(2)
+        description = match.group(3)
+        score = int(match.group(4))
+
+        # Convert timestamps to seconds
+        start_seconds = sum(x * int(t) for x, t in zip([60, 1], start_time.split(':')))
+        end_seconds = sum(x * int(t) for x, t in zip([60, 1], end_time.split(':')))
+        duration = end_seconds - start_seconds
+
+        # Only process high-scoring segments
+        if 15 <= duration <= 60 and score > 7:
+            output_path = output_dir / f"short_{len(segments) + 1}.mp4"
+
+            # FFmpeg command to cut video
+            command = [
+                'ffmpeg',
+                '-ss', str(start_seconds),
+                '-i', video_path,
+                '-t', str(duration),
+                '-vf', 'scale=1080:1920,setsar=1:1',
+                '-c:v', 'libx264',
+                '-c:a', 'aac',
+                '-y',
+                str(output_path)
+            ]
+
+            try:
+                subprocess.run(command, check=True)
+                segments.append({
+                    'path': output_path,
+                    'description': description,
+                    'score': score
+                })
+            except subprocess.CalledProcessError:
+                print(f"Failed to process segment: {start_time} - {end_time}")
+
+    return segments
+
+logger.debug(f"{response.content}")
+
+# 7. Process segments
+shorts = extract_segments(response.content)
+
+# 8. Print results
+print("\n--- Generated Shorts ---")
+for short in shorts:
+    print(f"Short at {short['path']}")
+    print(f"Description: {short['description']}")
+    print(f"Engagement Score: {short['score']}/10\n")
+