refactor: adjust scalar

Signed-off-by: thxCode <[email protected]>
gpustack · Sep 2, 2024 · e2ee052 · e2ee052
1 parent 543fbe3
commit e2ee052
Show file tree

Hide file tree

Showing 5 changed files with 200 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -714,18 +714,18 @@ $ gguf-parser --hf-repo TheBloke/LLaMA-7b-GGUF --hf-file llama-7b.Q4_0.gguf --sk
 
 | Variant  | CPU FLOPS (Performance Core) | iGPU FLOPS             | (V)RAM Bandwidth | Q8_0 Max TPS | Q4_0 Max TPS |
 |----------|------------------------------|------------------------|------------------|--------------|--------------|
-| M1       | 51.2 GFLOPS  (4 cores)       | 2.6 TFLOPS (8 cores)   | 68.3 GBps        | 8.58         | 14.57        |
-| M1 Pro   | 102.4 GFLOPS  (8 cores)      | 5.2 TFLOPS (16 cores)  | 204.8 GBps       | 24.73        | 40.85        |
-| M1 Max   | 102.4 GFLOPS  (8 cores)      | 10.4 TFLOPS (32 cores) | 409.6 GBps       | 44.24        | 68.38        |
-| M1 Ultra | 204.8 GFLOPS (16 cores)      | 21 TFLOPS (64 cores)   | 819.2 GBps       | 88.47        | 136.77       |
-| M2       | 56 GFLOPS (4 cores)          | 3.6 TFLOPS (10 cores)  | 102.4 GBps       | 12.49        | 20.77        |
-| M2 Pro   | 112 GFLOPS (8 cores)         | 6.8 TFLOPS (19 cores)  | 204.8 GBps       | 24.98        | 41.55        |
-| M2 Max   | 112 GFLOPS (8 cores)         | 13.6 TFLOPS (38 cores) | 409.6 GBps       | 45.05        | 70.35        |
-| M2 Ultra | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 90.10        | 140.70       |
-| M3       | 64.96 GFLOPS (4 cores)       | 4.1 TFLOPS (10 cores)  | 102.4 GBps       | 12.68        | 21.31        |
-| M3 Pro   | 97.44 GFLOPS (6 cores)       | 7.4 TFLOPS (18 cores)  | 153.6 GBps       | 19.02        | 31.96        |
-| M3 Max   | 194.88 GFLOPS (12 cores)     | 16.4 TFLOPS (40 cores) | 409.6 GBps       | 49.16        | 80.90        |
-| M4       | 70.56 GFLOPS (4 cores)       | 4.1 TFLOPS             | 120 GBps         | 14.75        | 24.66        |
+| M1       | 51.2 GFLOPS  (4 cores)       | 2.6 TFLOPS (8 cores)   | 68.3 GBps        | 8.53         | 14.42        |
+| M1 Pro   | 102.4 GFLOPS  (8 cores)      | 5.2 TFLOPS (16 cores)  | 204.8 GBps       | 24.51        | 40.27        |
+| M1 Max   | 102.4 GFLOPS  (8 cores)      | 10.4 TFLOPS (32 cores) | 409.6 GBps       | 43.56        | 66.78        |
+| M1 Ultra | 204.8 GFLOPS (16 cores)      | 21 TFLOPS (64 cores)   | 819.2 GBps       | 87.12        | 133.56       |
+| M2       | 56 GFLOPS (4 cores)          | 3.6 TFLOPS (10 cores)  | 102.4 GBps       | 12.39        | 20.50        |
+| M2 Pro   | 112 GFLOPS (8 cores)         | 6.8 TFLOPS (19 cores)  | 204.8 GBps       | 24.78        | 41.00        |
+| M2 Max   | 112 GFLOPS (8 cores)         | 13.6 TFLOPS (38 cores) | 409.6 GBps       | 44.41        | 68.79        |
+| M2 Ultra | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 88.81        | 137.59       |
+| M3       | 64.96 GFLOPS (4 cores)       | 4.1 TFLOPS (10 cores)  | 102.4 GBps       | 12.59        | 21.06        |
+| M3 Pro   | 97.44 GFLOPS (6 cores)       | 7.4 TFLOPS (18 cores)  | 153.6 GBps       | 18.89        | 31.59        |
+| M3 Max   | 194.88 GFLOPS (12 cores)     | 16.4 TFLOPS (40 cores) | 409.6 GBps       | 48.71        | 79.71        |
+| M4       | 70.56 GFLOPS (4 cores)       | 4.1 TFLOPS             | 120 GBps         | 14.64        | 24.35        |
 
 > References:
 > - https://www.cpu-monkey.com/en/cpu_family-apple_m_series
@@ -746,9 +746,9 @@ and estimate the maximum tokens per second for three Apple Mac Studio devices co
 
 | Device                        | CPU FLOPS (Performance Core) | iGPU FLOPS             | (V)RAM Bandwidth | Thunderbolt Bandwidth | Role       |
 |-------------------------------|------------------------------|------------------------|------------------|-----------------------|------------|
-| Apple Mac Studio (M2 Ultra) 0 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 5000 MBps             | Main       |
-| Apple Mac Studio (M2 Ultra) 1 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 5000 MBps             | RPC Server |
-| Apple Mac Studio (M2 Ultra) 2 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 5000 MBps             | RPC Server |
+| Apple Mac Studio (M2 Ultra) 0 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 40 Gbps               | Main       |
+| Apple Mac Studio (M2 Ultra) 1 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 40 Gbps               | RPC Server |
+| Apple Mac Studio (M2 Ultra) 2 | 224 GFLOPS (16 cores)        | 27.2 TFLOPS (76 cores) | 819.2 GBps       | 40 Gbps               | RPC Server |
 
 Get the maximum tokens per second with the following command:
 
@@ -758,16 +758,16 @@ $ gguf-parser --hf-repo leafspark/Meta-Llama-3.1-405B-Instruct-GGUF --hf-file Ll
   -c 512 \
   --device-metric "224GFLOPS;819.2GBps,27.2TFLOPS;819.2GBps" \
   --rpc host1:port,host2:port \
-  --device-metric "27.2TFLOPS;819.2GBps;5000MBps" \
-  --device-metric "27.2TFLOPS;819.2GBps;5000MBps" \
+  --device-metric "27.2TFLOPS;819.2GBps;40Gbps" \
+  --device-metric "27.2TFLOPS;819.2GBps;40Gbps" \
   --tensor-split "<Proportions>"
 ```
 
 | Tensor Split | Apple Mac Studio 0 (V)RAM Usage | Apple Mac Studio 1 (V)RAM Usage | Apple Mac Studio 2 (V)RAM Usage | Q4_0 Max TPS |
 |--------------|---------------------------------|---------------------------------|---------------------------------|--------------|
-| 5,1,1        | 761.53 MiB                      | 30.45 GiB                       | 30.36 GiB                       | 3.03         |
-| 10,1,1       | 821.53 MiB                      | 18.61 GiB                       | 16.83 GiB                       | 3.03         |
-| 20,1,1       | 861.53 MiB                      | 10.15 GiB                       | 8.37 GiB                        | 3.03         |
+| 5,1,1        | 761.53 MiB                      | 30.45 GiB                       | 30.36 GiB                       | 3.01         |
+| 10,1,1       | 821.53 MiB                      | 18.61 GiB                       | 16.83 GiB                       | 3.01         |
+| 20,1,1       | 861.53 MiB                      | 10.15 GiB                       | 8.37 GiB                        | 3.01         |
 
 #### Full Layers Offload (default)
 

diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
@@ -27,7 +27,7 @@ GLOBAL OPTIONS:
    --cache-type-k value, --ctk value                                   Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
    --cache-type-v value, --ctv value                                   Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
    --ctx-size value, -c value                                          Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1)
-   --device-metric value [ --device-metric value ]                     Specify the device metrics, which is used to estimate the usage, in form of "FLOPS;Up Bandwidth[;Down Bandwidth]". The FLOPS unit, select from [PFLOPS, TFLOPS, GFLOPS, MFLOPS, KFLOPS]. The Up/Down Bandwidth unit, select from [PiBps, PBps, TiBps, TBps, GiBps, GBps, MiBps, MBps, KiBps, KBps]. Up Bandwidth usually indicates the bandwidth to transmit the data to calculate, and Down Bandwidth indicates the bandwidth to transmit the calculated result to next layer. For example, "--device-metric 10TFLOPS;400GBps" means the device has 10 TFLOPS and 400 GBps Up/Down bandwidth, "--device-metric 10TFLOPS;400GBps;5000MBps" means the device has 5000MBps Down bandwidth. If the quantity specified by "--device-metric" is less than the number of estimation devices(determined by "--tensor-split" and "--rpc" to infer the device count), then replicate the last "--device-metric" to meet the required number of evaluation devices.
+   --device-metric value [ --device-metric value ]                     Specify the device metrics, which is used to estimate the usage, in form of "FLOPS;Up Bandwidth[;Down Bandwidth]". The FLOPS unit, select from [PFLOPS, TFLOPS, GFLOPS, MFLOPS, KFLOPS]. The Up/Down Bandwidth unit, select from [PiBps, TiBps, GiBps, MiBps, KiBps, PBps, TBps, GBps, MBps, KBps, Pbps, Tbps, Gbps, Mbps, Kbps]. Up Bandwidth usually indicates the bandwidth to transmit the data to calculate, and Down Bandwidth indicates the bandwidth to transmit the calculated result to next layer. For example, "--device-metric 10TFLOPS;400GBps" means the device has 10 TFLOPS and 400 GBps Up/Down bandwidth, "--device-metric 10TFLOPS;400GBps;5000MBps" means the device has 5000MBps Down bandwidth. If the quantity specified by "--device-metric" is less than the number of estimation devices(determined by "--tensor-split" and "--rpc" to infer the device count), then replicate the last "--device-metric" to meet the required number of evaluation devices.
    --flash-attention, --flash-attn, --fa                               Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM. (default: false)
    --gpu-layers value, --ngl value, --n-gpu-layers value               Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
    --gpu-layers-draft value, --ngld value, --n-gpu-layers-draft value  Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)

diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
@@ -544,7 +544,7 @@ func main() {
 				Usage: "Specify the device metrics, " +
 					"which is used to estimate the usage, in form of \"FLOPS;Up Bandwidth[;Down Bandwidth]\". " +
 					"The FLOPS unit, select from [PFLOPS, TFLOPS, GFLOPS, MFLOPS, KFLOPS]. " +
-					"The Up/Down Bandwidth unit, select from [PiBps, PBps, TiBps, TBps, GiBps, GBps, MiBps, MBps, KiBps, KBps]. " +
+					"The Up/Down Bandwidth unit, select from [PiBps, TiBps, GiBps, MiBps, KiBps, PBps, TBps, GBps, MBps, KBps, Pbps, Tbps, Gbps, Mbps, Kbps]. " +
 					"Up Bandwidth usually indicates the bandwidth to transmit the data to calculate, " +
 					"and Down Bandwidth indicates the bandwidth to transmit the calculated result to next layer. " +
 					"For example, \"--device-metric 10TFLOPS;400GBps\" means the device has 10 TFLOPS and 400 GBps Up/Down bandwidth, " +

diff --git a/scalar.go b/scalar.go
@@ -42,20 +42,8 @@ type (
 )
 
 var (
-	// _SizeBaseUnitMatrix is the base unit matrix for size.
-	_SizeBaseUnitMatrix = []struct {
-		Base float64
-		Unit string
-	}{
-		{_Pi, "P"},
-		{_Ti, "T"},
-		{_Gi, "G"},
-		{_Mi, "M"},
-		{_Ki, "K"},
-	}
-
-	// _BytesBaseUnitMatrix is the base unit matrix for bytes.
-	_BytesBaseUnitMatrix = []struct {
+	// _GeneralBaseUnitMatrix is the base unit matrix for bytes.
+	_GeneralBaseUnitMatrix = []struct {
 		Base float64
 		Unit string
 	}{
@@ -71,6 +59,18 @@ var (
 		{_K, "K"},
 	}
 
+	// _SizeBaseUnitMatrix is the base unit matrix for size.
+	_SizeBaseUnitMatrix = []struct {
+		Base float64
+		Unit string
+	}{
+		{_Pi, "P"},
+		{_Ti, "T"},
+		{_Gi, "G"},
+		{_Mi, "M"},
+		{_Ki, "K"},
+	}
+
 	// _NumberBaseUnitMatrix is the base unit matrix for numbers.
 	_NumberBaseUnitMatrix = []struct {
 		Base float64
@@ -89,15 +89,15 @@ func ParseSizeScalar(s string) (_ SizeScalar, err error) {
 	if s == "" {
 		return 0, errors.New("invalid SizeScalar")
 	}
-	b := float64(0)
+	b := float64(1)
 	for i := range _SizeBaseUnitMatrix {
 		if strings.HasSuffix(s, _SizeBaseUnitMatrix[i].Unit) {
 			b = _SizeBaseUnitMatrix[i].Base
-			s = strings.TrimSpace(strings.TrimSuffix(s, _SizeBaseUnitMatrix[i].Unit))
+			s = strings.TrimSuffix(s, _SizeBaseUnitMatrix[i].Unit)
 			break
 		}
 	}
-	f, err := strconv.ParseFloat(s, 64)
+	f, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
 	if err != nil {
 		return 0, err
 	}
@@ -125,16 +125,16 @@ func ParseFLOPSScalar(s string) (_ FLOPSScalar, err error) {
 	if s == "" {
 		return 0, errors.New("invalid FLOPSScalar")
 	}
-	s = strings.TrimSpace(strings.TrimSuffix(s, "FLOPS"))
-	b := float64(0)
-	for i := range _SizeBaseUnitMatrix {
-		if strings.HasSuffix(s, _SizeBaseUnitMatrix[i].Unit) {
-			b = _SizeBaseUnitMatrix[i].Base
-			s = strings.TrimSpace(strings.TrimSuffix(s, _SizeBaseUnitMatrix[i].Unit))
+	s = strings.TrimSuffix(s, "FLOPS")
+	b := float64(1)
+	for i := range _GeneralBaseUnitMatrix {
+		if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) {
+			b = _GeneralBaseUnitMatrix[i].Base
+			s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit)
 			break
 		}
 	}
-	f, err := strconv.ParseFloat(s, 64)
+	f, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
 	if err != nil {
 		return 0, err
 	}
@@ -146,10 +146,10 @@ func (s FLOPSScalar) String() string {
 		return "0 FLOPS"
 	}
 	b, u := float64(1), ""
-	for i := range _SizeBaseUnitMatrix {
-		if float64(s) >= _SizeBaseUnitMatrix[i].Base {
-			b = _SizeBaseUnitMatrix[i].Base
-			u = _SizeBaseUnitMatrix[i].Unit
+	for i := range _GeneralBaseUnitMatrix {
+		if float64(s) >= _GeneralBaseUnitMatrix[i].Base {
+			b = _GeneralBaseUnitMatrix[i].Base
+			u = _GeneralBaseUnitMatrix[i].Unit
 			break
 		}
 	}
@@ -162,31 +162,38 @@ func ParseBytesPerSecondScalar(s string) (_ BytesPerSecondScalar, err error) {
 	if s == "" {
 		return 0, errors.New("invalid BytesPerSecondScalar")
 	}
-	s = strings.TrimSpace(strings.TrimSuffix(s, "Bps"))
-	b := float64(0)
-	for i := range _BytesBaseUnitMatrix {
-		if strings.HasSuffix(s, _BytesBaseUnitMatrix[i].Unit) {
-			b = _BytesBaseUnitMatrix[i].Base
-			s = strings.TrimSpace(strings.TrimSuffix(s, _BytesBaseUnitMatrix[i].Unit))
+	b := float64(1)
+	o := float64(1)
+	switch {
+	case strings.HasSuffix(s, "Bps") || strings.HasSuffix(s, "B/s"):
+		s = strings.TrimSuffix(strings.TrimSuffix(s, "Bps"), "B/s")
+	case strings.HasSuffix(s, "bps") || strings.HasSuffix(s, "b/s"):
+		s = strings.TrimSuffix(strings.TrimSuffix(s, "bps"), "b/s")
+		o = 8
+	}
+	for i := range _GeneralBaseUnitMatrix {
+		if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) {
+			b = _GeneralBaseUnitMatrix[i].Base
+			s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit)
 			break
 		}
 	}
-	f, err := strconv.ParseFloat(s, 64)
+	f, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
 	if err != nil {
 		return 0, err
 	}
-	return BytesPerSecondScalar(f * b), nil
+	return BytesPerSecondScalar(f * b / o), nil
 }
 
 func (s BytesPerSecondScalar) String() string {
 	if s == 0 {
 		return "0 Bps"
 	}
 	b, u := float64(1), ""
-	for i := range _BytesBaseUnitMatrix {
-		if float64(s) >= _BytesBaseUnitMatrix[i].Base {
-			b = _BytesBaseUnitMatrix[i].Base
-			u = _BytesBaseUnitMatrix[i].Unit
+	for i := range _GeneralBaseUnitMatrix {
+		if float64(s) >= _GeneralBaseUnitMatrix[i].Base {
+			b = _GeneralBaseUnitMatrix[i].Base
+			u = _GeneralBaseUnitMatrix[i].Unit
 			break
 		}
 	}
@@ -213,16 +220,16 @@ func ParseGGUFBytesScalar(s string) (_ GGUFBytesScalar, err error) {
 	if s == "" {
 		return 0, errors.New("invalid GGUFBytesScalar")
 	}
-	s = strings.TrimSpace(strings.TrimSuffix(s, "B"))
-	b := float64(0)
-	for i := range _BytesBaseUnitMatrix {
-		if strings.HasSuffix(s, _BytesBaseUnitMatrix[i].Unit) {
-			b = _BytesBaseUnitMatrix[i].Base
-			s = strings.TrimSpace(strings.TrimSuffix(s, _BytesBaseUnitMatrix[i].Unit))
+	s = strings.TrimSuffix(s, "B")
+	b := float64(1)
+	for i := range _GeneralBaseUnitMatrix {
+		if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) {
+			b = _GeneralBaseUnitMatrix[i].Base
+			s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit)
 			break
 		}
 	}
-	f, err := strconv.ParseFloat(s, 64)
+	f, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
 	if err != nil {
 		return 0, err
 	}
@@ -241,10 +248,10 @@ func (s GGUFBytesScalar) String() string {
 		b = _Mi
 		u = "Mi"
 	} else {
-		for i := range _BytesBaseUnitMatrix {
-			if float64(s) >= _BytesBaseUnitMatrix[i].Base {
-				b = _BytesBaseUnitMatrix[i].Base
-				u = _BytesBaseUnitMatrix[i].Unit
+		for i := range _GeneralBaseUnitMatrix {
+			if float64(s) >= _GeneralBaseUnitMatrix[i].Base {
+				b = _GeneralBaseUnitMatrix[i].Base
+				u = _GeneralBaseUnitMatrix[i].Unit
 				break
 			}
 		}

diff --git a/scalar_test.go b/scalar_test.go
@@ -0,0 +1,120 @@
+package gguf_parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestParseSizeScalar(t *testing.T) {
+	testCases := []struct {
+		given    string
+		expected SizeScalar
+	}{
+		{"1", 1},
+		{"1K", 1 * _Ki},
+		{"1M", 1 * _Mi},
+		{"1G", 1 * _Gi},
+		{"1T", 1 * _Ti},
+		{"1P", 1 * _Pi},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.given, func(t *testing.T) {
+			actual, err := ParseSizeScalar(tc.given)
+			if !assert.NoError(t, err) {
+				return
+			}
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}
+
+func TestParseFLOPSScalar(t *testing.T) {
+	testCases := []struct {
+		given    string
+		expected FLOPSScalar
+	}{
+		{"1FLOPS", 1},
+		{"1KFLOPS", 1 * _K},
+		{"1MFLOPS", 1 * _M},
+		{"1GFLOPS", 1 * _G},
+		{"1TFLOPS", 1 * _T},
+		{"1PFLOPS", 1 * _P},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.given, func(t *testing.T) {
+			actual, err := ParseFLOPSScalar(tc.given)
+			if !assert.NoError(t, err) {
+				return
+			}
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}
+
+func TestParseBytesPerSecondScalar(t *testing.T) {
+	testCases := []struct {
+		given    string
+		expected BytesPerSecondScalar
+	}{
+		{"1B/s", 1},
+		{"1KB/s", 1 * _K},
+		{"1MB/s", 1 * _M},
+		{"1GB/s", 1 * _G},
+		{"1TB/s", 1 * _T},
+		{"1PB/s", 1 * _P},
+		{"1KiBps", 1 * _Ki},
+		{"1MiBps", 1 * _Mi},
+		{"1GiBps", 1 * _Gi},
+		{"1TiBps", 1 * _Ti},
+		{"1PiBps", 1 * _Pi},
+		{"8b/s", 1},
+		{"1Kbps", 1 * _K >> 3},
+		{"1Mbps", 1 * _M >> 3},
+		{"1Gbps", 1 * _G >> 3},
+		{"1Tbps", 1 * _T >> 3},
+		{"1Pbps", 1 * _P >> 3},
+		{"1Kibps", 1 * _Ki >> 3},
+		{"1Mibps", 1 * _Mi >> 3},
+		{"1Gibps", 1 * _Gi >> 3},
+		{"1Tibps", 1 * _Ti >> 3},
+		{"1Pibps", 1 * _Pi >> 3},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.given, func(t *testing.T) {
+			actual, err := ParseBytesPerSecondScalar(tc.given)
+			if !assert.NoError(t, err) {
+				return
+			}
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}
+
+func TestParseGGUFBytesScalar(t *testing.T) {
+	testCases := []struct {
+		given    string
+		expected GGUFBytesScalar
+	}{
+		{"1B", 1},
+		{"1KB", 1 * _K},
+		{"1MB", 1 * _M},
+		{"1GB", 1 * _G},
+		{"1TB", 1 * _T},
+		{"1PB", 1 * _P},
+		{"1KiB", 1 * _Ki},
+		{"1MiB", 1 * _Mi},
+		{"1GiB", 1 * _Gi},
+		{"1TiB", 1 * _Ti},
+		{"1PiB", 1 * _Pi},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.given, func(t *testing.T) {
+			actual, err := ParseGGUFBytesScalar(tc.given)
+			if !assert.NoError(t, err) {
+				return
+			}
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}