Skip to content

Commit

Permalink
feat(backends): Drop bert.cpp
Browse files Browse the repository at this point in the history
use llama.cpp 3.2 as a drop-in replacement for bert.cpp

Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Nov 26, 2024
1 parent e8128a3 commit a2d95ae
Show file tree
Hide file tree
Showing 13 changed files with 36 additions and 180 deletions.
30 changes: 1 addition & 29 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

# go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
Expand Down Expand Up @@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
endif

ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
Expand Down Expand Up @@ -228,19 +223,6 @@ endif

all: help

## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch

sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a

## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
Expand Down Expand Up @@ -320,12 +302,11 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp

replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
Expand All @@ -334,7 +315,6 @@ replace:
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
Expand All @@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
Expand Down Expand Up @@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc

backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif

backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
Expand Down
4 changes: 2 additions & 2 deletions aio/cpu/embeddings.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: text-embedding-ada-002
backend: bert-embeddings
embeddings: true
parameters:
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf

usage: |
You can test this model with curl like this:
Expand Down
34 changes: 0 additions & 34 deletions backend/go/llm/bert/bert.go

This file was deleted.

21 changes: 0 additions & 21 deletions backend/go/llm/bert/main.go

This file was deleted.

8 changes: 5 additions & 3 deletions core/gallery/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"gopkg.in/yaml.v3"
)

const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/579d21ddc282faa49132119e5fc8cda7/raw/b58eafbf8f242e887c677c541c83c16cf7cc5ee8/bert-embeddings.yaml`

var _ = Describe("Model test", func() {

Context("Downloading", func() {
Expand Down Expand Up @@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {

gallery := []GalleryModel{{
Name: "bert",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())
Expand All @@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(models)).To(Equal(1))
Expect(models[0].Name).To(Equal("bert"))
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
Expect(models[0].Installed).To(BeFalse())

err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
Expand All @@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))

models, err = AvailableGalleryModels(galleries, tempdir)
Expect(err).ToNot(HaveOccurred())
Expand Down
16 changes: 9 additions & 7 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
return nil, resp.StatusCode
}

const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/579d21ddc282faa49132119e5fc8cda7/raw/b58eafbf8f242e887c677c541c83c16cf7cc5ee8/bert-embeddings.yaml`

//go:embed backend-assets/*
var backendAssets embed.FS

Expand Down Expand Up @@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
g := []gallery.GalleryModel{
{
Name: "bert",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
},
{
Name: "bert2",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
}
out, err := yaml.Marshal(g)
Expand Down Expand Up @@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
Expect(content["foo"]).To(Equal("bar"))

models, err = getModels("http://127.0.0.1:9090/models/available")
Expand All @@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
It("overrides models", func() {

response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Name: "bert",
Overrides: map[string]interface{}{
"backend": "llama",
Expand Down Expand Up @@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
})
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Name: "bert",
Overrides: map[string]interface{}{},
})
Expand All @@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
})

It("runs openllama(llama-ggml backend)", Label("llama"), func() {
Expand Down
46 changes: 11 additions & 35 deletions docs/content/docs/features/embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,6 @@ embeddings: true
# .. other parameters
```

## Bert embeddings

To use `bert.cpp` models you can use the `bert` embedding backend.

An example model config file:

```yaml
name: text-embedding-ada-002
parameters:
model: bert
backend: bert-embeddings
embeddings: true
# .. other parameters
```

The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.

For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:

```bash
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
```

To test locally (LocalAI server running on `localhost`),
you can use `curl` (and `jq` at the end to prettify):

```bash
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}' | jq "."
```

## Huggingface embeddings

To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
Expand Down Expand Up @@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g

## Llama.cpp embeddings

Embeddings with `llama.cpp` are supported with the `llama` backend.
Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.

```yaml
name: my-awesome-model
backend: llama
backend: llama-cpp
embeddings: true
parameters:
model: ggml-file.bin
# ...
```

Then you can use the API to generate embeddings:

```bash
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "My text",
"model": "my-awesome-model"
}' | jq "."
```

## 💡 Examples

- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
2 changes: 1 addition & 1 deletion docs/content/docs/features/model-gallery.md
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{

```bash
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
"id": "bert-embeddings",
"name": "text-embedding-ada-002"
}'
```
Expand Down
23 changes: 0 additions & 23 deletions embedded/models/bert-cpp.yaml

This file was deleted.

12 changes: 0 additions & 12 deletions gallery/bert-embeddings.yaml

This file was deleted.

12 changes: 5 additions & 7 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
overrides:
embeddings: true
parameters:
model: llama-3.2-1b-instruct-q4_k_m.gguf
files:
Expand Down Expand Up @@ -8732,16 +8733,13 @@
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
## Bert embeddings
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
## Bert embeddings (llama3.2 drop-in)
- !!merge <<: *llama32
name: "bert-embeddings"
license: "Apache 2.0"
urls:
- https://huggingface.co/skeskinen/ggml
description: |
llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
tags:
- embeddings
description: |
Bert model that can be used for embeddings
## Stable Diffusion
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
license: "BSD-3"
Expand Down
Loading

0 comments on commit a2d95ae

Please sign in to comment.