diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go index 7b2e82a..798090a 100644 --- a/cmd/gguf-parser/main.go +++ b/cmd/gguf-parser/main.go @@ -1196,6 +1196,9 @@ func tprint(title string, headers, bodies [][]any) { for i := range r { r[i].Number = i + 1 r[i].AutoMerge = true + if len(headers) > 1 && (headers[1][i] == "UMA" || headers[1][i] == "NonUMA") { + r[i].AutoMerge = false + } r[i].Align = text.AlignCenter r[i].AlignHeader = text.AlignCenter } diff --git a/file_estimate.go b/file_estimate.go index 56e67ec..4c80ff4 100644 --- a/file_estimate.go +++ b/file_estimate.go @@ -221,7 +221,7 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) ( } // Full offload: isOffloadOutputLayer && nLoadLayers == 0. - // Partial offload: nLoadLayers > 0 && nOffloadLayers > 0. + // Partial offload: !isOffloadOutputLayer. // Zero offload: nOffloadLayers == 0. var ( nLoadLayers = a.BlockCount @@ -254,8 +254,8 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) ( e.OffloadLayers = nOffloadLayers fullOffload = isOffloadOutputLayer && nLoadLayers == 0 - partialOffload = nLoadLayers > 0 && nOffloadLayers > 0 - zeroOffload = !fullOffload && !partialOffload + partialOffload = !isOffloadOutputLayer + zeroOffload = nOffloadLayers == 0 } // Footprint.