tc-mb · tc-mb · Oct 9, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -69,6 +69,7 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 
 # 3rd party libs
 option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
+option(LLAMA_FFMPEG "llama: use ffmpeg to load video files" OFF)
 
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)

diff --git a/Makefile b/Makefile
@@ -968,6 +968,11 @@ override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
 override LDFLAGS  := $(LDFLAGS) -lcurl
 endif
 
+ifdef LLAMA_FFMPEG
+override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_FFMPEG $(shell pkg-config --cflags libavformat libavcodec libavutil)
+override LDFLAGS  := $(LDFLAGS) $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale
+endif
+
 #
 # Print build information
 #
@@ -1465,16 +1470,13 @@ llama-llava-cli: examples/llava/llava-cli.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
-FFMPEG_CFLAGS := $(shell pkg-config --cflags libavformat libavcodec libavutil)
-FFMPEG_LIBS := $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale
-
 llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
 	examples/llava/llava.cpp \
 	examples/llava/llava.h \
 	examples/llava/clip.cpp \
 	examples/llava/clip.h \
 	$(OBJ_ALL)
-	$(CXX) $(CXXFLAGS) $(FFMPEG_CFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) $(FFMPEG_LIBS) -Wno-cast-qual
+	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
 ifeq ($(UNAME_S),Darwin)
 swift: examples/batched.swift

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
@@ -83,6 +83,19 @@ if (LLAMA_CURL)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
 endif ()
 
+# Use ffmpeg to load video files
+if (LLAMA_FFMPEG)
+    find_package(PkgConfig REQUIRED)
+    pkg_check_modules(FFMPEG REQUIRED
+        libavformat
+        libavcodec
+        libavutil
+    )
+    add_definitions(-DLLAMA_USE_FFMPEG)
+    include_directories(${FFMPEG_INCLUDE_DIRS})
+    set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
+endif ()
+
 target_include_directories(${TARGET} PUBLIC .)
 target_compile_features   (${TARGET} PUBLIC cxx_std_11)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp
@@ -9,12 +9,14 @@
 #include <cstdlib>
 #include <vector>
 
+#if defined(LLAMA_USE_FFMPEG)
 extern "C" {
     #include <libavcodec/avcodec.h>
     #include <libavformat/avformat.h>
     #include <libavutil/imgutils.h>
     #include <libswscale/swscale.h>
 }
+#endif // LLAMA_USE_FFMPEG
 
 struct llava_context {
     struct clip_ctx * ctx_clip = NULL;
@@ -28,6 +30,8 @@ struct clip_image_u8 {
     std::vector<uint8_t> buf;
 };
 
+#if defined(LLAMA_USE_FFMPEG)
+
 static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
     AVFormatContext* format_ctx = nullptr;
     if (avformat_open_input(&format_ctx, video_path.c_str(), nullptr, nullptr) < 0) {
@@ -156,6 +160,15 @@ static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path
     return frames;
 }
 
+#else
+
+static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
+    LOG_TEE("%s: llama.cpp built without ffmpeg, processing video files is not supported. Please recompile with LLAMA_FFMPEG=1 to add video support.\n", __func__);
+    return {};
+}
+
+#endif // LLAMA_USE_FFMPEG
+
 static void show_additional_info(int /*argc*/, char ** argv) {
     LOG_TEE("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> [--video <path/to/an/video.mp4>] [--image <path/to/an/image.jpg>] [--image <path/to/another/image.jpg>] [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
     LOG_TEE("  note: a lower temperature value like 0.1 is recommended for better quality.\n");