From ed46ff210715f8a9ebeed868740d6c9f2291b4f1 Mon Sep 17 00:00:00 2001 From: Fredrik Dahlgren Date: Mon, 24 Jun 2024 11:36:37 +0200 Subject: [PATCH 1/3] Added GGML model file format --- ml/gguf.ksy | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 ml/gguf.ksy diff --git a/ml/gguf.ksy b/ml/gguf.ksy new file mode 100644 index 000000000..8b26a9249 --- /dev/null +++ b/ml/gguf.ksy @@ -0,0 +1,192 @@ +meta: + id: gguf + title: GGML model file + file-extension: gguf + license: CC0-1.0 + ks-version: 0.10 + endian: le + bit-endian: le +doc: | + GGUF is a file format for storing models for inference with GGML and + executors based on GGML. GGUF is a binary format that is designed for + fast loading and saving of models, and for ease of reading. Models + are traditionally developed using PyTorch or another framework, and + then converted to GGUF for use in GGML. + + It is a successor file format to GGML, GGMF and GGJT, and is designed + to be unambiguous by containing all the information needed to load a + model. It is also designed to be extensible, so that new information + can be added to models without breaking compatibility. +doc-ref: + - https://github.com/ggerganov/ggml/blob/master/docs/gguf.md +seq: + - id: magic + contents: GGUF + - id: version + type: u4 + - id: num_infos + type: u8 + doc: The number of tensors in the file + - id: num_kv + type: u8 + doc: The number of header key-value pairs + - id: kv + type: gguf_kv + repeat: expr + repeat-expr: num_kv + - id: infos + type: gguf_tensor_info + repeat: expr + repeat-expr: num_infos + - id: data + type: gguf_tensor_data(_io.pos) + + +types: + gguf_value: + -webide-representation: '{value:dec}' + params: + - id: type + type: u4 + enum: gguf_type + seq: + - id: value + type: + switch-on: type + cases: + 'gguf_type::gguf_type_uint8': u1 + 'gguf_type::gguf_type_int8': s1 + 'gguf_type::gguf_type_uint16': u2 + 'gguf_type::gguf_type_int16': s2 + 'gguf_type::gguf_type_uint32': u4 + 'gguf_type::gguf_type_int32': s4 + 'gguf_type::gguf_type_float32': f4 + 'gguf_type::gguf_type_bool': b1 + 'gguf_type::gguf_type_string': gguf_str + 'gguf_type::gguf_type_array': gguf_array + 'gguf_type::gguf_type_uint64': u8 + 'gguf_type::gguf_type_int64': s8 + 'gguf_type::gguf_type_float64': f8 + + gguf_kv: + -webide-representation: '{key}: {value}' + seq: + - id: key + type: gguf_str + - id: type + type: u4 + enum: gguf_type + - id: value + type: gguf_value(type) + + + gguf_str: + -webide-representation: '"{data}"' + seq: + - id: size + type: u8 + - id: data + size: size + type: str + encoding: ascii + + gguf_array: + # Note that this is more permissive than the parser defined + # by the GGML library which does not permit nested arrays. + -webide-representation: '[{elems}]' + seq: + - id: type + type: u4 + enum: gguf_type + - id: num_elems + type: u8 + - id: elems + type: gguf_value(type) # Allows for nested arrays + repeat: expr + repeat-expr: num_elems + + u8_dec: + # This type is ony used to provide a nicer webide + # representation of `gguf_tensor_info` structures. + -webide-representation: '{value:dec}' + seq: + - id: value + type: u8 + + gguf_tensor_info: + -webide-representation: '{type}[{ne}] {name}' + seq: + - id: name + type: gguf_str + - id: num_dims + type: u4 + - id: dims + type: u8_dec + repeat: expr + repeat-expr: num_dims + - id: type + type: u4 + enum: ggml_type + - id: offset + type: u8 + + gguf_tensor_data: + params: + - id: offset + type: u8 + instances: + padding: + # This hardcodes the default GGUF file alignment (32). + pos: offset + size: 32 - (offset % 32) + data: + pos: offset + padding.size + size-eos: true + +enums: + gguf_type: + 0: gguf_type_uint8 + 1: gguf_type_int8 + 2: gguf_type_uint16 + 3: gguf_type_int16 + 4: gguf_type_uint32 + 5: gguf_type_int32 + 6: gguf_type_float32 + 7: gguf_type_bool + 8: gguf_type_string + 9: gguf_type_array + 10: gguf_type_uint64 + 11: gguf_type_int64 + 12: gguf_type_float64 + + ggml_type: + 0: ggml_type_f32 + 1: ggml_type_f16 + 2: ggml_type_q4_0 + 3: ggml_type_q4_1 + 4: ggml_type_q4_2 + 5: ggml_type_q4_3 + 6: ggml_type_q5_0 + 7: ggml_type_q5_1 + 8: ggml_type_q8_0 + 9: ggml_type_q8_1 + 10: ggml_type_q2_k + 11: ggml_type_q3_k + 12: ggml_type_q4_k + 13: ggml_type_q5_k + 14: ggml_type_q6_k + 15: ggml_type_q8_k + 16: ggml_type_iq2_xxs + 17: ggml_type_iq2_xs + 18: ggml_type_iq3_xxs + 19: ggml_type_iq1_s + 20: ggml_type_iq4_nl + 21: ggml_type_iq3_s + 22: ggml_type_iq2_s + 23: ggml_type_iq4_xs + 24: ggml_type_i8 + 25: ggml_type_i16 + 26: ggml_type_i32 + 27: ggml_type_i64 + 28: ggml_type_f64 + 29: ggml_type_iq1_m From 919c1d57bc9c1126a4899ce1375a189810a3be1f Mon Sep 17 00:00:00 2001 From: Fredrik Dahlgren Date: Mon, 24 Jun 2024 12:20:55 +0200 Subject: [PATCH 2/3] Updated GGUF format to support languages without variant support --- ml/gguf.ksy | 103 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/ml/gguf.ksy b/ml/gguf.ksy index 8b26a9249..183895d20 100644 --- a/ml/gguf.ksy +++ b/ml/gguf.ksy @@ -44,7 +44,7 @@ seq: types: gguf_value: - -webide-representation: '{value:dec}' + -webide-representation: '{value}' params: - id: type type: u4 @@ -54,19 +54,19 @@ types: type: switch-on: type cases: - 'gguf_type::gguf_type_uint8': u1 - 'gguf_type::gguf_type_int8': s1 - 'gguf_type::gguf_type_uint16': u2 - 'gguf_type::gguf_type_int16': s2 - 'gguf_type::gguf_type_uint32': u4 - 'gguf_type::gguf_type_int32': s4 - 'gguf_type::gguf_type_float32': f4 - 'gguf_type::gguf_type_bool': b1 + 'gguf_type::gguf_type_uint8': gguf_uint8 + 'gguf_type::gguf_type_int8': gguf_int8 + 'gguf_type::gguf_type_uint16': gguf_uint16 + 'gguf_type::gguf_type_int16': gguf_int16 + 'gguf_type::gguf_type_uint32': gguf_uint32 + 'gguf_type::gguf_type_int32': gguf_int32 + 'gguf_type::gguf_type_float32': gguf_float32 + 'gguf_type::gguf_type_bool': gguf_bool 'gguf_type::gguf_type_string': gguf_str 'gguf_type::gguf_type_array': gguf_array - 'gguf_type::gguf_type_uint64': u8 - 'gguf_type::gguf_type_int64': s8 - 'gguf_type::gguf_type_float64': f8 + 'gguf_type::gguf_type_uint64': gguf_uint64 + 'gguf_type::gguf_type_int64': gguf_int64 + 'gguf_type::gguf_type_float64': gguf_float64 gguf_kv: -webide-representation: '{key}: {value}' @@ -104,24 +104,93 @@ types: type: gguf_value(type) # Allows for nested arrays repeat: expr repeat-expr: num_elems + + gguf_bool: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value}' + seq: + - id: value + type: b1 + + gguf_uint8: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: u1 + + gguf_int8: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: s1 + + gguf_uint16: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: u2 + + gguf_int16: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: s2 + + gguf_uint32: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: u4 + + gguf_int32: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: s4 - u8_dec: - # This type is ony used to provide a nicer webide - # representation of `gguf_tensor_info` structures. + gguf_uint64: + # This type is used as a work-around for languages (like C++) that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: u8 + gguf_int64: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value:dec}' + seq: + - id: value + type: s8 + + gguf_float32: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value}' + seq: + - id: value + type: f4 + + gguf_float64: + # This type is used as a work-around for languages (like C++) that do not support variant types. + -webide-representation: '{value}' + seq: + - id: value + type: f8 + gguf_tensor_info: - -webide-representation: '{type}[{ne}] {name}' + -webide-representation: '{type}[{dims}] {name}' seq: - id: name type: gguf_str - id: num_dims type: u4 - id: dims - type: u8_dec + type: gguf_uint64 repeat: expr repeat-expr: num_dims - id: type From ecd1ad607859e19b7219fdd992b93003ab0fc156 Mon Sep 17 00:00:00 2001 From: Fredrik Dahlgren Date: Mon, 24 Jun 2024 12:48:30 +0200 Subject: [PATCH 3/3] Updated comment formatting --- ml/gguf.ksy | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/ml/gguf.ksy b/ml/gguf.ksy index 183895d20..a7736e2a6 100644 --- a/ml/gguf.ksy +++ b/ml/gguf.ksy @@ -7,13 +7,12 @@ meta: endian: le bit-endian: le doc: | - GGUF is a file format for storing models for inference with GGML and - executors based on GGML. GGUF is a binary format that is designed for - fast loading and saving of models, and for ease of reading. Models - are traditionally developed using PyTorch or another framework, and - then converted to GGUF for use in GGML. + GGUF is a file format for storing machine learning models for inference with + the GGML library, or executors based on GGML. Models are typically developed + using PyTorch or some other framework, and then converted to GGUF for use in + GGML. - It is a successor file format to GGML, GGMF and GGJT, and is designed + GGUF is a successor file format to GGML, GGMF and GGJT, and is designed to be unambiguous by containing all the information needed to load a model. It is also designed to be extensible, so that new information can be added to models without breaking compatibility. @@ -22,25 +21,29 @@ doc-ref: seq: - id: magic contents: GGUF + doc: GGUF file magic - id: version type: u4 + doc: File format version - id: num_infos type: u8 doc: The number of tensors in the file - id: num_kv type: u8 - doc: The number of header key-value pairs + doc: The number of key-value pairs in the file header - id: kv type: gguf_kv repeat: expr repeat-expr: num_kv + doc: Key-value pairs - id: infos type: gguf_tensor_info repeat: expr repeat-expr: num_infos + doc: Tensor metadata - id: data type: gguf_tensor_data(_io.pos) - + doc: Tensor data types: gguf_value: @@ -79,7 +82,6 @@ types: - id: value type: gguf_value(type) - gguf_str: -webide-representation: '"{data}"' seq: @@ -106,77 +108,88 @@ types: repeat-expr: num_elems gguf_bool: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value}' seq: - id: value type: b1 gguf_uint8: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: u1 gguf_int8: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: s1 gguf_uint16: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: u2 gguf_int16: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: s2 gguf_uint32: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: u4 gguf_int32: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: s4 gguf_uint64: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: u8 gguf_int64: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value:dec}' seq: - id: value type: s8 gguf_float32: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value}' seq: - id: value type: f4 gguf_float64: - # This type is used as a work-around for languages (like C++) that do not support variant types. + # This type is used as a work-around for languages (like C++) + # that do not support variant types. -webide-representation: '{value}' seq: - id: value