Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: WIP: Adjust GPU Layers #3737

Draft
wants to merge 27 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
790700b
Add GGUF Parser
siddimore Oct 6, 2024
a234da5
chore: :arrow_up: Update ggerganov/llama.cpp to `a39ab216aa624308fda7…
localai-bot Oct 3, 2024
a3a03a4
chore: :arrow_up: Update ggerganov/whisper.cpp to `ede1718f6d45aa3f7a…
localai-bot Oct 3, 2024
0dc66a6
chore(federated): display a message when nodes are not available (#3721)
mudler Oct 3, 2024
717978e
Update CONTRIBUTING.md (#3723)
jjasghar Oct 3, 2024
78e29f3
models(gallery): add salamandra-7b-instruct (#3726)
mudler Oct 3, 2024
af1eb1d
chore: :arrow_up: Update ggerganov/llama.cpp to `d5ed2b929d85bbd7dbee…
localai-bot Oct 4, 2024
16dfee9
chore: :arrow_up: Update ggerganov/whisper.cpp to `ccc2547210e09e3a17…
localai-bot Oct 4, 2024
0ec4dc6
feat(multimodal): allow to template placeholders (#3728)
mudler Oct 4, 2024
bb130ff
Update README.md
mudler Oct 4, 2024
5b19cee
feat(vllm): add support for image-to-text and video-to-text (#3729)
mudler Oct 4, 2024
2d11bfc
chore: :arrow_up: Update ggerganov/llama.cpp to `71967c2a6d30da9f6158…
localai-bot Oct 4, 2024
fc74bf1
chore: :arrow_up: Update ggerganov/whisper.cpp to `2944cb72d952823780…
localai-bot Oct 4, 2024
4dffc45
feat(shutdown): allow force shutdown of backends (#3733)
mudler Oct 5, 2024
1c0300b
fix(base-grpc): close channel in base grpc server (#3734)
mudler Oct 5, 2024
64ade06
chore: :arrow_up: Update ggerganov/whisper.cpp to `6a94163b913d8e974e…
localai-bot Oct 6, 2024
a8f095a
chore: :arrow_up: Update ggerganov/llama.cpp to `8c475b97b8ba7d678d4c…
localai-bot Oct 6, 2024
1ad80c9
fix pr comment
siddimore Oct 6, 2024
6dee2a6
Add tests
siddimore Oct 7, 2024
6d1199d
Save Model Memory Usage
siddimore Oct 7, 2024
eaee726
fix merge conflict
siddimore Oct 7, 2024
0a306c8
Merge branch 'master' into adjust_default_gpu_layers
mudler Oct 7, 2024
e2fb38f
Merge branch 'master' into adjust_default_gpu_layers
mudler Oct 7, 2024
18bcce2
Add code to query NVIDIA device
siddimore Oct 8, 2024
37f2d65
Merge branch 'master' into adjust_default_gpu_layers
siddimore Oct 8, 2024
7380f80
Add AdjustGPULayers flag
siddimore Oct 9, 2024
cd1dc5d
rename file
siddimore Oct 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,13 @@ require (
github.com/google/s2a-go v0.1.7 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/gpustack/gguf-parser-go v0.11.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/labstack/echo/v4 v4.12.0 // indirect
github.com/labstack/gommon v0.4.2 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pion/datachannel v1.5.8 // indirect
github.com/pion/dtls/v2 v2.2.12 // indirect
Expand All @@ -102,6 +106,7 @@ require (
github.com/pion/transport/v2 v2.2.10 // indirect
github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/webrtc/v3 v3.3.0 // indirect
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
github.com/urfave/cli/v2 v2.27.4 // indirect
Expand Down Expand Up @@ -272,7 +277,7 @@ require (
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b // indirect
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/cast v1.5.0 // indirect
Expand Down Expand Up @@ -320,3 +325,19 @@ require (
howett.net/plist v1.0.0 // indirect
lukechampine.com/blake3 v1.3.0 // indirect
)

replace github.com/donomii/go-rwkv.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-rwkv.cpp
siddimore marked this conversation as resolved.
Show resolved Hide resolved

replace github.com/ggerganov/whisper.cpp => /Users/siddharthmore/localAI/LocalAI/sources/whisper.cpp

replace github.com/ggerganov/whisper.cpp/bindings/go => /Users/siddharthmore/localAI/LocalAI/sources/whisper.cpp/bindings/go

replace github.com/go-skynet/go-bert.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-bert.cpp

replace github.com/M0Rf30/go-tiny-dream => /Users/siddharthmore/localAI/LocalAI/sources/go-tiny-dream

replace github.com/mudler/go-piper => /Users/siddharthmore/localAI/LocalAI/sources/go-piper

replace github.com/mudler/go-stable-diffusion => /Users/siddharthmore/localAI/LocalAI/sources/go-stable-diffusion

replace github.com/go-skynet/go-llama.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-llama.cpp
13 changes: 13 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKj
github.com/google/go-containerregistry v0.19.2/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI=
github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
Expand Down Expand Up @@ -295,6 +296,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gpustack/gguf-parser-go v0.11.1 h1:6kEt4a+O2AeG3yS1KvyTvqZD8ut73z6ddY+8VAhae/M=
github.com/gpustack/gguf-parser-go v0.11.1/go.mod h1:FWtM5FF/6JKF3tIuW4gbuq1yk7N3v3CIfWypnxo1KpY=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
Expand Down Expand Up @@ -355,6 +358,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
Expand Down Expand Up @@ -483,8 +488,12 @@ github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
Expand Down Expand Up @@ -659,6 +668,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
Expand Down Expand Up @@ -713,6 +724,8 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b h1:p5SKp3b2tRcYoVve/bZxTl9EcUq9RpODRf8CBIfI1Bg=
github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
Expand Down
233 changes: 233 additions & 0 deletions pkg/model/gguf_parser_wrapper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package model

import (
"context"
"fmt"
"net/url"
"os"
"strings"

ggufparser "github.com/gpustack/gguf-parser-go"
)

// Structs for parsing GGUF data from Parser
type ModelEstimate struct {
Estimate ModelEstimateItems `json:"estimate"`
Architecture Architecture `json:"architecture"`
Metadata Metadata `json:"metadata"`
Tokenizer Tokenizer `json:"tokenizer"`
}

type ModelEstimateItems struct {
Items []ModelMemory `json:"items"`
Type string `json:"type"`
Architecture string `json:"architecture"`
ContextSize int `json:"contextSize"`
FlashAttention bool `json:"flashAttention"`
NoMMap bool `json:"noMMap"`
EmbeddingOnly bool `json:"embeddingOnly"`
Distributable bool `json:"distributable"`
LogicalBatchSize int32 `json:"logicalBatchSize"`
PhysicalBatchSize int32 `json:"physicalBatchSize"`
}

type ModelMemory struct {
OffloadLayers uint64 `json:"offloadLayers"`
FullOffloaded bool `json:"fullOffloaded"`
RAM EstimateRAM `json:"ram"`
VRAMs []EstimateVRAM `json:"vrams"`
}

type EstimateRAM struct {
UMA uint64 `json:"uma"`
NonUMA uint64 `json:"nonuma"`
}

type EstimateVRAM struct {
UMA uint64 `json:"uma"`
NonUMA uint64 `json:"nonuma"`
}

type Architecture struct {
Type string `json:"type"`
Architecture string `json:"architecture"`
MaximumContextLength int `json:"maximumContextLength"`
EmbeddingLength int `json:"embeddingLength"`
VocabularyLength int `json:"vocabularyLength"`
}

type Metadata struct {
Type string `json:"type"`
Architecture string `json:"architecture"`
QuantizationVersion int `json:"quantizationVersion"`
Alignment int `json:"alignment"`
Name string `json:"name"`
License string `json:"license"`
FileType int `json:"fileType"`
LittleEndian bool `json:"littleEndian"`
FileSize int64 `json:"fileSize"`
Size int64 `json:"size"`
Parameters int64 `json:"parameters"`
}

type Tokenizer struct {
Model string `json:"model"`
TokensLength int `json:"tokensLength"`
TokensSize int `json:"tokensSize"`
}

// Default platform footprint from ggufparser
const nonUMARamFootprint = uint64(150 * 1024 * 1024) // 150 MiB
const nonUMAVramFootprint = uint64(250 * 1024 * 1024) // 250 MiB

func GetModelGGufData(modelPath string) (*ModelEstimate, error) {
ctx := context.Background()

// Check if the input is a valid URL
if isURL(modelPath) {
fmt.Println("Input is a URL.")
ggufRemoteData, err := ggufparser.ParseGGUFFileRemote(ctx, modelPath)
if err != nil {
return nil, fmt.Errorf("error parsing GGUF file from remote URL: %v", err)
}
return estimateModelMemoryUsage(ggufRemoteData)

// Check if the input is an Ollama model
} else if strings.HasSuffix(modelPath, "ollama") {
fmt.Println("Input is an Ollama model.")
ggufOllamaData, err := ggufparser.ParseGGUFFileFromOllama(ctx, modelPath)
if err != nil {
return nil, fmt.Errorf("error parsing GGUF file from Ollama model: %v", err)
}
return estimateModelMemoryUsage(ggufOllamaData)

// Check if the input is a Hugging Face model reference (format: huggingface.co/<repo>/<file>)
} else if strings.Contains(modelPath, "huggingface.co") {
fmt.Println("Input is a Hugging Face model.")

// Parse the URL to extract the repository and filename
u, err := url.Parse(modelPath)
if err != nil {
return nil, fmt.Errorf("invalid Hugging Face URL: %v", err)
}

// Example URL: https://huggingface.co/<repo>/<file>.gguf
parts := strings.Split(u.Path, "/")
if len(parts) < 3 {
return nil, fmt.Errorf("invalid Hugging Face model format. Expected format: huggingface.co/<repo>/<file>")
}

repo := parts[1] // Repository name
file := parts[2] // File name

ggufHuggingFaceData, err := ggufparser.ParseGGUFFileFromHuggingFace(ctx, repo, file)
if err != nil {
return nil, fmt.Errorf("error parsing GGUF file from Hugging Face: %v", err)
}
return estimateModelMemoryUsage(ggufHuggingFaceData)

// Otherwise, assume the input is a file path
} else if fileExists(modelPath) {
fmt.Println("Input is a file path.")
ggufData, err := ggufparser.ParseGGUFFile(modelPath)
if err != nil {
return nil, fmt.Errorf("error parsing GGUF file from file path: %v", err)
}
return estimateModelMemoryUsage(ggufData)
}

return nil, fmt.Errorf("unsupported input type")
}

// Helper function to check if the string is a valid URL
func isURL(input string) bool {
_, err := url.ParseRequestURI(input)
return err == nil
}

// Helper function to check if the input is a valid file path
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}

func estimateModelMemoryUsage(ggufFile *ggufparser.GGUFFile) (*ModelEstimate, error) {

if ggufFile == nil {
fmt.Printf("Error Invalid GGUF File \n")

// Invalid ModelPath return nil and use default values
return nil, nil
}

//
llamacppRunEstimateOpts := []ggufparser.LLaMACppRunEstimateOption{}
//
llamacppRunEstimate := ggufFile.EstimateLLaMACppRun(llamacppRunEstimateOpts...)

// Summarize the item with mmap and footprint values
summary := llamacppRunEstimate.SummarizeItem(true, nonUMARamFootprint, nonUMAVramFootprint)
// Fetch architecture, metadata, and tokenizer from GGUF file
architecture := ggufFile.Architecture()
metadata := ggufFile.Metadata()
tokenizer := ggufFile.Tokenizer()

// Construct the JSON payload
payload := ModelEstimate{
Estimate: ModelEstimateItems{
Items: []ModelMemory{
{
OffloadLayers: summary.OffloadLayers,
FullOffloaded: summary.FullOffloaded,
RAM: EstimateRAM{
UMA: uint64(summary.RAM.UMA),
NonUMA: uint64(summary.RAM.NonUMA),
},
VRAMs: []EstimateVRAM{
{
UMA: uint64(summary.VRAMs[0].UMA),
NonUMA: uint64(summary.VRAMs[0].NonUMA),
},
},
},
},
Type: architecture.Type,
Architecture: architecture.Architecture,
ContextSize: int(llamacppRunEstimate.ContextSize),
FlashAttention: llamacppRunEstimate.FlashAttention,
NoMMap: llamacppRunEstimate.NoMMap,
EmbeddingOnly: llamacppRunEstimate.EmbeddingOnly,
Distributable: llamacppRunEstimate.Distributable,
LogicalBatchSize: llamacppRunEstimate.LogicalBatchSize,
PhysicalBatchSize: llamacppRunEstimate.PhysicalBatchSize,
},
Architecture: Architecture{
Type: metadata.Type,
Architecture: architecture.Architecture,
MaximumContextLength: int(architecture.MaximumContextLength),
EmbeddingLength: int(architecture.EmbeddingLength),
VocabularyLength: int(architecture.VocabularyLength),
},
Metadata: Metadata{
Type: metadata.Type,
Architecture: metadata.Architecture,
QuantizationVersion: int(metadata.QuantizationVersion),
Name: metadata.Name,
License: metadata.License,
FileType: int(metadata.FileType),
LittleEndian: metadata.LittleEndian,
FileSize: int64(metadata.FileSize),
Parameters: int64(metadata.Parameters),
},
Tokenizer: Tokenizer{
Model: tokenizer.Model,
TokensLength: int(tokenizer.TokensLength),
TokensSize: int(tokenizer.TokensSize),
},
}

return &payload, nil
}
Loading