From 1b31d8331ea3a1f848becac956ef2b6f8f583458 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 6 Mar 2023 19:50:51 +0000 Subject: [PATCH 1/2] Add script to create test directories & files Copy-pasted from https://whetstone.private.storage/privatestorage/privatestoragedesktop/-/issues/761#note_27299 --- integration/make_small_files.py | 126 ++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 integration/make_small_files.py diff --git a/integration/make_small_files.py b/integration/make_small_files.py new file mode 100644 index 000000000..9356bdc2d --- /dev/null +++ b/integration/make_small_files.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +import math +import random +from pathlib import Path +from hashlib import sha256 + +import click + + +# Description: Creates a lot of directories and small files to +# performance test GridSync/Magic Folder with. +# Author: Meejah +# Date: 2023-01-20 + + +_hash = sha256("a random seed".encode("utf8")) + + +def random_file_segment(): + """ + A random filename segment for a directory or file + """ + _hash.update("a".encode("utf8")) + digest = _hash.hexdigest() + return digest[:random.randrange(1, len(digest))] + + +def generate_directories(base, count): + """ + A generator that creates a certain number of random + directories-names below the base + """ + # XXX would be nice to use hypothesis strategies to generate + # stuff, but .. that's hard? + for _ in range(count): + seg = random_file_segment() + yield base / seg + + +def generate_filename_segments(files): + """ + Generate some number of random filename segments + """ + for _ in range(files): + yield random_file_segment() + + +def generate_local_paths(output, files, directories): + """ + generator for a sequence of path names + """ + dir_names = generate_directories(output, directories) + file_names = generate_filename_segments(files) + + # since we need at least one file in each directory (because we + # don't directly store directories) we place one of our files in + # each subdir + + reusable_dirs = [] + + for d in dir_names: + reusable_dirs.append(d) + f = next(file_names) + d.mkdir() + path = d / f + yield path + + idx = 0 + for f in file_names: + idx = (idx + 1) % len(reusable_dirs) + yield reusable_dirs[idx] / f + + +@click.command() +@click.option( + "--files", + default=665, + help="Number of files to put data in" +) +@click.option( + "--directories", + default=237, + help="Number of folders to split data into" +) +@click.option( + "--output", + type=click.Path(exists=False, file_okay=False, dir_okay=True), + default="./small-test-case", +) +@click.option( + "--size", + default=3*1024*1024, +) +def small_files(files, directories, output, size): + """ + small-files test-case creator + """ + print(output) + if directories > files: + raise click.UsageError( + "Must have more files than directories" + ) + + data_per_file = math.ceil(float(size) / files) + + with open("/dev/urandom", "rb") as urandom: + + def generate_data(): + # could introduce some variance... + for _ in range(files): + yield urandom.read(data_per_file) + + outp = Path(output) + outp.mkdir() + + data = generate_data() + + for path in generate_local_paths(outp, files, directories): + print(path) + with path.open('wb') as output: + output.write(next(data)) + + +if __name__ == "__main__": + small_files() + From 52c19ed7a18b2b2f9b6c34bf35cdb461fc8c36d5 Mon Sep 17 00:00:00 2001 From: "Christopher R. Wood" Date: Tue, 7 Mar 2023 12:42:05 +0000 Subject: [PATCH 2/2] Add TODO about random source From @crwood, see https://github.com/gridsync/gridsync/pull/604#issuecomment-1456718805 --- integration/make_small_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration/make_small_files.py b/integration/make_small_files.py index 9356bdc2d..0b8cc841a 100644 --- a/integration/make_small_files.py +++ b/integration/make_small_files.py @@ -103,6 +103,8 @@ def small_files(files, directories, output, size): data_per_file = math.ceil(float(size) / files) + # TODO: this should use Python's os.urandom() so + # that Windows folks can use it too with open("/dev/urandom", "rb") as urandom: def generate_data():