From e236528e7628a0e59751eee9addf21fc3c33d376 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 14 Jul 2024 16:47:14 +1000 Subject: [PATCH] gguf_hash.py: Add sha256 (#8470) * gguf_hash.py: Add sha256 * gguf_hash.py: rename string UUIDv5 --> uuid * Apply suggestions from code review Co-authored-by: compilade --------- Co-authored-by: compilade --- gguf-py/scripts/gguf_hash.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/gguf-py/scripts/gguf_hash.py b/gguf-py/scripts/gguf_hash.py index 770b79a93471c..ee34d09bfe7ef 100755 --- a/gguf-py/scripts/gguf_hash.py +++ b/gguf-py/scripts/gguf_hash.py @@ -27,8 +27,9 @@ # For more information about what field.parts and field.data represent, # please see the comments in the modify_gguf.py example. -def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar) -> None: +def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None: sha1 = hashlib.sha1() + sha256 = hashlib.sha256() uuidv5_sha1 = hashlib.sha1() uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes) @@ -50,7 +51,7 @@ def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar) -> None: bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar) # Hashing Process - for n, tensor in enumerate(reader.tensors, 1): + for tensor in reader.tensors: # We don't need these if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): @@ -62,29 +63,39 @@ def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar) -> None: sum_weights_in_tensor *= dim bar.update(sum_weights_in_tensor) - sha1_layer = hashlib.sha1() - sha1_layer.update(tensor.data.data) + if not no_layer: + + sha1_layer = hashlib.sha1() + sha1_layer.update(tensor.data.data) + print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 + + sha256_layer = hashlib.sha256() + sha256_layer.update(tensor.data.data) + print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 + sha1.update(tensor.data.data) + sha256.update(tensor.data.data) uuidv5_sha1.update(tensor.data.data) - print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 # Flush Hash Progress Bar bar.close() # Display Hash Output - print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100 - print("UUIDv5 {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100 + print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100 + print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100 + print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100 def main() -> None: parser = argparse.ArgumentParser(description="Dump GGUF file metadata") parser.add_argument("model", type=str, help="GGUF format model filename") + parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash") parser.add_argument("--verbose", action="store_true", help="increase output verbosity") parser.add_argument("--progressbar", action="store_true", help="enable progressbar") args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) reader = GGUFReader(args.model, 'r') - gguf_hash(reader, args.model, not args.progressbar) + gguf_hash(reader, args.model, not args.progressbar, args.no_layer) if __name__ == '__main__':