diff --git a/comps/llms/text-generation/ollama/README.md b/comps/llms/text-generation/ollama/README.md new file mode 100644 index 000000000..c6289e7d2 --- /dev/null +++ b/comps/llms/text-generation/ollama/README.md @@ -0,0 +1,3 @@ +# Introduction + +[Ollama](https://github.com/ollama/ollama) allows you to run open-source large language models, such as Llama 3, locally. Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. It's the best choice to deploy large language models on AIPC locally. diff --git a/comps/llms/text-generation/ollama/cpp/xeon/Dockerfile b/comps/llms/text-generation/ollama/cpp/xeon/Dockerfile new file mode 100644 index 000000000..09ca086d7 --- /dev/null +++ b/comps/llms/text-generation/ollama/cpp/xeon/Dockerfile @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:latest +RUN apt update && apt -y install wget git cmake build-essential +RUN wget https://go.dev/dl/go1.23.2.linux-amd64.tar.gz && rm -rf /usr/local/go && tar -C /usr/local -xzf go1.23.2.linux-amd64.tar.gz +ENV PATH="$PATH:/usr/local/go/bin" +WORKDIR /ollama +RUN git clone https://github.com/ollama/ollama.git . +RUN cd /ollama && OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on -DGGML_AVX512=on -DGGML_AVX512_VNNI=on -DGGML_AVX512_VBMI=on " go generate ./... && go build . +ENV PATH="$PATH:/ollama" +COPY entrypoint.sh . +ENTRYPOINT ["bash", "entrypoint.sh"] \ No newline at end of file diff --git a/comps/llms/text-generation/ollama/cpp/xeon/README.md b/comps/llms/text-generation/ollama/cpp/xeon/README.md new file mode 100644 index 000000000..6ca68fa93 --- /dev/null +++ b/comps/llms/text-generation/ollama/cpp/xeon/README.md @@ -0,0 +1,35 @@ +# Introduction + +This Ollama server was compiled from the [official Ollama repository](https://github.com/ollama/ollama) with additional flags suitable for Intel Xeon CPU. Below are the compilation flags: + +- `DGGML_AVX=on` +- `DGGML_AVX2=on` +- `DGGML_F16C=on` +- `DGGML_FMA=on` +- `DGGML_AVX512=on` +- `DGGML_AVX512_VNNI=on` +- `DGGML_AVX512_VBMI=on` + +## Usage + +1. Start the microservice + +```bash +docker run --network host opea/llm-ollama-cpp-xeon:latest +``` + +2. Send an application/json request to the API endpoint of Ollama to interact. + +```bash +curl --noproxy "*" http://localhost:11434/api/generate -d '{ + "model": "phi3", + "prompt":"Why is the sky blue?" +}' +``` + +## Build Docker Image + +```bash +cd comps/llms/text-generation/ollama/cpp/xeon +docker build -t opea/llm-ollama-cpp-xeon:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` diff --git a/comps/llms/text-generation/ollama/cpp/xeon/entrypoint.sh b/comps/llms/text-generation/ollama/cpp/xeon/entrypoint.sh new file mode 100644 index 000000000..8bd02dbfc --- /dev/null +++ b/comps/llms/text-generation/ollama/cpp/xeon/entrypoint.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +ollama serve