Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(backends): Drop bert.cpp #4272

Merged
merged 2 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 1 addition & 29 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

# go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
Expand Down Expand Up @@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
endif

ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
Expand Down Expand Up @@ -228,19 +223,6 @@ endif

all: help

## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch

sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a

## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
Expand Down Expand Up @@ -320,12 +302,11 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp

replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
Expand All @@ -334,7 +315,6 @@ replace:
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
Expand All @@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
Expand Down Expand Up @@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc

backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif

backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
Expand Down
4 changes: 2 additions & 2 deletions aio/cpu/embeddings.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: text-embedding-ada-002
backend: bert-embeddings
embeddings: true
parameters:
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf

usage: |
You can test this model with curl like this:
Expand Down
34 changes: 0 additions & 34 deletions backend/go/llm/bert/bert.go

This file was deleted.

21 changes: 0 additions & 21 deletions backend/go/llm/bert/main.go

This file was deleted.

8 changes: 5 additions & 3 deletions core/gallery/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"gopkg.in/yaml.v3"
)

const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`

var _ = Describe("Model test", func() {

Context("Downloading", func() {
Expand Down Expand Up @@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {

gallery := []GalleryModel{{
Name: "bert",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())
Expand All @@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(models)).To(Equal(1))
Expect(models[0].Name).To(Equal("bert"))
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
Expect(models[0].Installed).To(BeFalse())

err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
Expand All @@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))

models, err = AvailableGalleryModels(galleries, tempdir)
Expect(err).ToNot(HaveOccurred())
Expand Down
24 changes: 13 additions & 11 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
return nil, resp.StatusCode
}

const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`

//go:embed backend-assets/*
var backendAssets embed.FS

Expand Down Expand Up @@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
g := []gallery.GalleryModel{
{
Name: "bert",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
},
{
Name: "bert2",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
}
out, err := yaml.Marshal(g)
Expand Down Expand Up @@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
Expect(content["foo"]).To(Equal("bar"))

models, err = getModels("http://127.0.0.1:9090/models/available")
Expand All @@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
It("overrides models", func() {

response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Name: "bert",
Overrides: map[string]interface{}{
"backend": "llama",
Expand Down Expand Up @@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
})
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
URL: bertEmbeddingsURL,
Name: "bert",
Overrides: map[string]interface{}{},
})
Expand All @@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
})

It("runs openllama(llama-ggml backend)", Label("llama"), func() {
Expand Down Expand Up @@ -806,7 +808,7 @@ var _ = Describe("API test", func() {
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
})
It("can generate completions via ggml", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
Expand Down Expand Up @@ -866,8 +868,8 @@ var _ = Describe("API test", func() {
},
)
Expect(err).ToNot(HaveOccurred(), err)
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))

sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
Expand Down Expand Up @@ -951,7 +953,7 @@ var _ = Describe("API test", func() {
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five"), ContainSubstring("5")))

stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expand Down
46 changes: 11 additions & 35 deletions docs/content/docs/features/embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,6 @@ embeddings: true
# .. other parameters
```

## Bert embeddings

To use `bert.cpp` models you can use the `bert` embedding backend.

An example model config file:

```yaml
name: text-embedding-ada-002
parameters:
model: bert
backend: bert-embeddings
embeddings: true
# .. other parameters
```

The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.

For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:

```bash
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
```

To test locally (LocalAI server running on `localhost`),
you can use `curl` (and `jq` at the end to prettify):

```bash
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}' | jq "."
```

## Huggingface embeddings

To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
Expand Down Expand Up @@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g

## Llama.cpp embeddings

Embeddings with `llama.cpp` are supported with the `llama` backend.
Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.

```yaml
name: my-awesome-model
backend: llama
backend: llama-cpp
embeddings: true
parameters:
model: ggml-file.bin
# ...
```

Then you can use the API to generate embeddings:

```bash
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "My text",
"model": "my-awesome-model"
}' | jq "."
```

## 💡 Examples

- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
2 changes: 1 addition & 1 deletion docs/content/docs/features/model-gallery.md
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{

```bash
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
"id": "bert-embeddings",
"name": "text-embedding-ada-002"
}'
```
Expand Down
23 changes: 0 additions & 23 deletions embedded/models/bert-cpp.yaml

This file was deleted.

12 changes: 0 additions & 12 deletions gallery/bert-embeddings.yaml

This file was deleted.

Loading
Loading