Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ffmpeg compiler flag for video understanding #32

Merged
merged 2 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})

# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
option(LLAMA_FFMPEG "llama: use ffmpeg to load video files" OFF)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
Expand Down
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,11 @@ override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
override LDFLAGS := $(LDFLAGS) -lcurl
endif

ifdef LLAMA_FFMPEG
override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_FFMPEG $(shell pkg-config --cflags libavformat libavcodec libavutil)
override LDFLAGS := $(LDFLAGS) $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale
endif

#
# Print build information
#
Expand Down Expand Up @@ -1465,16 +1470,13 @@ llama-llava-cli: examples/llava/llava-cli.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual

FFMPEG_CFLAGS := $(shell pkg-config --cflags libavformat libavcodec libavutil)
FFMPEG_LIBS := $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale

llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
examples/llava/llava.cpp \
examples/llava/llava.h \
examples/llava/clip.cpp \
examples/llava/clip.h \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $(FFMPEG_CFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) $(FFMPEG_LIBS) -Wno-cast-qual
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual

ifeq ($(UNAME_S),Darwin)
swift: examples/batched.swift
Expand Down
13 changes: 13 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ if (LLAMA_CURL)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
endif ()

# Use ffmpeg to load video files
if (LLAMA_FFMPEG)
find_package(PkgConfig REQUIRED)
pkg_check_modules(FFMPEG REQUIRED
libavformat
libavcodec
libavutil
)
add_definitions(-DLLAMA_USE_FFMPEG)
include_directories(${FFMPEG_INCLUDE_DIRS})
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
endif ()

target_include_directories(${TARGET} PUBLIC .)
target_compile_features (${TARGET} PUBLIC cxx_std_11)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
13 changes: 13 additions & 0 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#include <cstdlib>
#include <vector>

#if defined(LLAMA_USE_FFMPEG)
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#endif // LLAMA_USE_FFMPEG

struct llava_context {
struct clip_ctx * ctx_clip = NULL;
Expand All @@ -28,6 +30,8 @@ struct clip_image_u8 {
std::vector<uint8_t> buf;
};

#if defined(LLAMA_USE_FFMPEG)

static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
AVFormatContext* format_ctx = nullptr;
if (avformat_open_input(&format_ctx, video_path.c_str(), nullptr, nullptr) < 0) {
Expand Down Expand Up @@ -156,6 +160,15 @@ static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path
return frames;
}

#else

static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
LOG_TEE("%s: llama.cpp built without ffmpeg, processing video files is not supported. Please recompile with LLAMA_FFMPEG=1 to add video support.\n", __func__);
return {};
}

#endif // LLAMA_USE_FFMPEG

static void show_additional_info(int /*argc*/, char ** argv) {
LOG_TEE("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> [--video <path/to/an/video.mp4>] [--image <path/to/an/image.jpg>] [--image <path/to/another/image.jpg>] [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
LOG_TEE(" note: a lower temperature value like 0.1 is recommended for better quality.\n");
Expand Down