diff --git a/CHANGELOG.md b/CHANGELOG.md index 450f3eb..f346d5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [latest](https://github.com/tarantool/sdvg/compare/0.0.1..master) +### Changed + +- The `template` field in the `string` data type is now used to generate template strings + with the ability to use the values of any columns of the generated model. + +- In the `format_template` field of the output parameters, the variable `ColumnNames` is now available. + +### Breaking changes + +- Using `template` field to specify a string pattern like `Aa0#` is no longer supported, + `pattern` should be used instead. + +- The `Rows` variable in the `format_template` filed of the output parameters is now a two-dimensional array, + not a map. + ## [0.0.1](https://github.com/tarantool/sdvg/compare/36d0930..0.0.1) - 2025-07-21 ### Added diff --git a/config/models.yml b/config/models.yml index b3b22d9..e382e34 100644 --- a/config/models.yml +++ b/config/models.yml @@ -62,9 +62,13 @@ models: - name: passport type: string type_params: - template: AA 00 000 000 + pattern: AA 00 000 000 distinct_percentage: 1 ordered: true + - name: email + type: string + type_params: + template: "{{ .first_name_en | lower }}.{{ .id }}@email.com" - name: created type: datetime type_params: diff --git a/doc/en/usage.md b/doc/en/usage.md index 73c7d20..6683903 100644 --- a/doc/en/usage.md +++ b/doc/en/usage.md @@ -154,8 +154,11 @@ Structure `models[*].columns[*].type_params` for data type `string`: - `min_length`: Minimum string length. Default is `1`. - `max_length`: Maximum string length. Default is `32`. - `logical_type`: Logical type of string. Supported values: `first_name`, `last_name`, `phone`, `text`. -- `template`: Template for string generation. Symbol `A` - any uppercase letter, symbol `a` - any lowercase letter, - symbol `0` - any digit, symbol `#` - any character. Other characters remain as-is. +- `template`: Template for string generation. Allows you to use the values of any columns of the generated model. + Information about the functions available in template strings is described at the end of this section. + Cannot coexist with `ordered`, `distinct_percentage` and `distinct_count`. +- `pattern`: Pattern for string generation. The `A` symbol is any capital letter, the `a` symbol is any small letter, + symbol `0` is any digit, the `#` symbol is any character, and the other characters remain as they are. - `locale`: Locale for generated strings. Supported values: `ru`, `en`. Default is `en`. - `without_large_letters`: Flag indicating if uppercase letters should be excluded from the string. - `without_small_letters`: Flag indicating if lowercase letters should be excluded from the string. @@ -189,53 +192,66 @@ Structure `output.params` for format `http`: - `batch_size`: Number of data records sent in one request. Default is `1000`. - `workers_count`: Number of threads for writing data. Default is `1`. *Experimental field.* - `headers`: HTTP request headers specified as a dictionary. Default is none. -- `format_template`: Template-based format for sending data, configured using Golang templates. - Available for use in `format_template`: - - - fields: +- `format_template`: Template-based format for sending data, configured using templates. + There are 3 fields available for use in `format_template`: * `ModelName` - name of the model. - * `Rows` - array of records, where each element is a dictionary representing a data row. - Dictionary keys correspond to column names, and values correspond to data in those columns. - - functions: - * `len` - returns the length of the given element. - * `json` - converts the given element to a JSON string. - - Example value for the `format_template` field: - - ```yaml - format_template: | - { - "table_name": "{{ .ModelName }}", - "meta": { - "rows_count": {{ len .Rows }} - }, - "rows": [ - {{- range $i, $row := .Rows }} - {{- if $i}},{{ end }} - { - "id": {{ index $row "id" }}, - "username": "{{ index $row "name" }}" - } - {{- end }} - ] - } - ``` + * `ColumnNames` - array of column names. + * `Rows` - a two-dimensional array, where each outer element represents a table row, + and the inner element contains values of this row in the same order as `ColumnNames`. Default value for the `format_template` field: - ```yaml format_template: | { "table_name": {{ .ModelName }}, - "rows": {{ json .Rows }} + "rows": {{ rowsJson .ColumnNames .Rows }} } ``` + + You can read about the available functions and the use of template strings at the end of this section. Structure of `output.params` for `tcs` format: Similar to the structure for the `http` format, except that the `format_template` field is immutable and always set to its default value. +Using Template Strings: + +Template strings are implemented using the standard golang library, you can read about +all its features and available functions in this [documentation](https://pkg.go.dev/text/template). + +Accessing Data: + +In a template, data is accessed using `.`(the object or value passed to the template) +and the field name, for example: `{{ .var }}`. + +Function calls: + +- direct call: `{{ upper .name }}`. +- using pipe: `{{ .name | upper }}`. + +The following is a list of additional functions available in certain template fields: + +In the `template` field of `string` data type: + +- `upper`: converts the string to upper case. +- `lower`: converts the string to lower case. + +In the `format_template` field of the output parameters: + +- `len`: returns the length of the element. +- `json`: converts the element to a JSON string. +- `rowsJson`: converts an array of column names (`ColumnNames`) and a two-dimensional array of rows (`Rows`) + into a JSON array whose elements are objects of the form: + ``` + { + "columnName1": value1, + "columnName2": value2, + ... + } + ``` + where each object corresponds to one row of the table. + #### Examples of data generation configuration Example data model configuration: @@ -301,9 +317,13 @@ models: - name: passport type: string type_params: - template: AA 00 000 000 + pattern: AA 00 000 000 distinct_percentage: 1 ordered: true + - name: email + type: string + type_params: + template: "{{ .first_name_en | lower }}.{{ .id }}@example.com" - name: rating type: float type_params: @@ -382,7 +402,7 @@ output: "meta": { "rows_count": {{ len .Rows }} }, - "rows": {{ json .Rows }} + "rows": {{ rowsJson .ColumnNames .Rows }} } models: diff --git a/doc/ru/usage.md b/doc/ru/usage.md index 0a44b75..9d151c6 100644 --- a/doc/ru/usage.md +++ b/doc/ru/usage.md @@ -160,8 +160,11 @@ open_ai: - `min_length`: Минимальная длина строки. По умолчанию `1`. - `max_length`: Максимальная длина строки. По умолчанию `32`. - `logical_type`: Логический тип строки. Поддерживаемые значения: `first_name`, `last_name`, `phone`, `text`. -- `template`: Шаблон для генерации строки. Символ `A` - любая большая буква, символ `a` - любая маленькая буква, - символ `0` - любая цифра, символ `#` - любой символ. Остальные символы остаются как есть. +- `template`: Шаблон для генерации строки. Позволяет использовать значения любых столбов генерируемой модели. + Информация о том, как использовать шаблонные строки, описана в конце данного раздела. + Не работает совместно с `ordered`, `distinct_percentage` и `distinct_count`. +- `pattern`: Паттерн для генерации строки. Символ `A` - любая большая буква, символ `a` - любая маленькая буква, + символ `0` - любая цифра, символ `#` - любой символ, а остальные символы остаются как есть. - `locale`: Локаль для генерации строк. Поддерживаемые значения: `ru`, `en`. По умолчанию `en`. - `without_large_letters`: Флаг, указывающий, исключать ли большие буквы из строки. - `without_small_letters`: Флаг, указывающий, исключать ли маленькие буквы из строки. @@ -195,53 +198,66 @@ open_ai: - `batch_size`: Размер отправляемого в одном запросе массива данных. По умолчанию `1000`. - `workers_count`: Количество потоков для записи данных. По умолчанию `1`. *Является экспериментальным полем.* - `headers`: Заголовки http запроса, указываются в формате словаря. По умолчанию отсутствуют. -- `format_template`: Формат отправляемых данных, конфигурируемый с помощью шаблонов Golang. - Для использования в поле `format_template` доступны: - - - поля: +- `format_template`: Формат отправляемых данных, конфигурируемый с помощью шаблонов. + Для использования в `format_template` доступно 3 поля: * `ModelName` - имя модели. - * `Rows` - массив записей, где каждый элемент является словарем, который представляет собой строку данных. - Ключи словаря соответствуют названиям столбцов, а значения — данным в этих столбцах. - - функции: - * `len` - возвращает длину переданного элемента. - * `json` - преобразует переданный элемент в JSON строку. - - Пример значения поля `format_template`: - - ```yaml - format_template: | - { - "table_name": "{{ .ModelName }}", - "meta": { - "rows_count": {{ len .Rows }} - }, - "rows": [ - {{- range $i, $row := .Rows }} - {{- if $i}},{{ end }} - { - "id": {{ index $row "id" }}, - "username": "{{ index $row "name" }}" - } - {{- end }} - ] - } - ``` + * `ColumnNames` - массив имён колонок. + * `Rows` - двумерный массив, где каждый внешний элемент представляет строку таблицы, + а внутренний содержит значения этой строки в том же порядке, что и `ColumnNames`. Значение поля `format_template` по умолчанию: - ```yaml format_template: | { - "table_name": {{ .ModelName }}, - "rows": {{ json .Rows }} + "table_name": "{{ .ModelName }}", + "rows": {{ rowsJson .ColumnNames .Rows }} } ``` + + О доступных функциях и использовании шаблонных строк можно прочитать в конце данного раздела. Структура `output.params` для формата `tcs`: Подобна структуре для формата `http`, за исключением того, что поле `format_template` неизменяемое и всегда равняется значению по умолчанию. +Использование шаблонных строк: + +Шаблонные строки реализованы с использованием стандартной библиотеки golang, ознакомиться +со всеми ее возможностями и доступными функциями можно данной [документации](https://pkg.go.dev/text/template). + +Доступ к данным: + +Обращение к данным в шаблоне выполняется с помощью `.`(объект или значение, переданное шаблону) +и имени переменной, например, `{{ .var }}`. + +Вызовы функций: + +- прямой вызов: `{{ upper .name }}`. +- с помощью pipe: `{{ .name | upper }}`. + +Ниже приведён список дополнительных функций, доступных в определённых полях шаблонов: + +В поле `template` типа данных `string`: + +- `upper`: преобразует строку в верхний регистр. +- `lower`: преобразует строку в нижний регистр. + +В поле `format_template` параметров вывода: + +- `len`: возвращает длину элемента. +- `json`: преобразует элемент в JSON строку. +- `rowsJson`: преобразует массив имён колонок (`ColumnNames`) и двумерный массив строк (`Rows`) + в JSON-массив, элементами которого являются объекты вида: + ``` + { + "columnName1": value1, + "columnName2": value2, + ... + } + ``` + где каждый объект соответствует одной строке таблицы. + #### Примеры конфигурации генерации данных Пример конфигурации модели данных: @@ -307,9 +323,13 @@ models: - name: passport type: string type_params: - template: AA 00 000 000 + pattern: AA 00 000 000 distinct_percentage: 1 ordered: true + - name: email + type: string + type_params: + template: "{{ .first_name_en | lower }}.{{ .id }}@example.com" - name: rating type: float type_params: @@ -388,7 +408,7 @@ output: "meta": { "rows_count": {{ len .Rows }} }, - "rows": {{ json .Rows }} + "rows": {{ rowsJson .ColumnNames .Rows }} } models: diff --git a/go.mod b/go.mod index ae4ded1..1a3689c 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/labstack/echo/v4 v4.13.3 github.com/manifoldco/promptui v0.9.0 github.com/moby/term v0.5.2 + github.com/otaviokr/topological-sort v1.1.0 github.com/pkg/errors v0.9.1 github.com/sashabaranov/go-openai v1.36.1 github.com/spf13/afero v1.12.0 diff --git a/go.sum b/go.sum index a590230..11b141e 100644 --- a/go.sum +++ b/go.sum @@ -37,24 +37,41 @@ github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -65,6 +82,7 @@ github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB1 github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ilyakaznacheev/cleanenv v1.5.0 h1:0VNZXggJE2OYdXE87bfSSwGxeiGt9moSR2lOrsHHvr4= github.com/ilyakaznacheev/cleanenv v1.5.0/go.mod h1:a5aDzaJrLCQZsazHol1w8InnDcOX0OColm64SlIi6gk= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -113,11 +131,25 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/termenv v0.15.3-0.20240618155329-98d742f6907a h1:2MaM6YC3mGu54x+RKAA6JiFFHlHDY1UbkxqppT7wYOg= github.com/muesli/termenv v0.15.3-0.20240618155329-98d742f6907a/go.mod h1:hxSnBBYLK21Vtq/PHd0S2FYCxBXzBua8ov5s1RobyRQ= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.1 h1:foqVmeWDD6yYpK+Yz3fHyNIxFYNxswxqNFjSKe+vI54= +github.com/onsi/ginkgo v1.16.1/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.11.0 h1:+CqWgvj0OZycCaqclBD1pxKHAU+tOkHmQIWvDHq2aug= +github.com/onsi/gomega v1.11.0/go.mod h1:azGKhqFUon9Vuj0YmTfLSmx0FUwqXYSTl5re8lQLTUg= +github.com/otaviokr/topological-sort v1.1.0 h1:BrWj/bLOo9aZFUi0YN2/s4P/GRe2PSmb8cyX4w1ysNg= +github.com/otaviokr/topological-sort v1.1.0/go.mod h1:77ZaKUg7Ir1nL6DPwEIQFm9iH2OS5xxVWvzZ8xPTCFg= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= @@ -134,8 +166,10 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= @@ -146,6 +180,7 @@ github.com/vbauerster/mpb/v8 v8.8.3 h1:dTOByGoqwaTJYPubhVz3lO5O6MK553XVgUo33LdnN github.com/vbauerster/mpb/v8 v8.8.3/go.mod h1:JfCCrtcMsJwP6ZwMn9e5LMnNyp3TVNpUWWkN+nd4EWk= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= @@ -162,29 +197,60 @@ go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= @@ -193,11 +259,25 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50= google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg= google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= olympos.io/encoding/edn v0.0.0-20201019073823-d3554ca0b0a3 h1:slmdOY3vp8a7KQbHkL+FLbvbkgMqmXojpFUO/jENuqQ= diff --git a/internal/generator/common/utils.go b/internal/generator/common/utils.go index 38f95d0..523b6d4 100644 --- a/internal/generator/common/utils.go +++ b/internal/generator/common/utils.go @@ -7,12 +7,14 @@ import ( "fmt" "os" "reflect" + "regexp" "slices" "strconv" "strings" "time" "github.com/google/uuid" + "github.com/otaviokr/topological-sort/toposort" "github.com/pkg/errors" "gopkg.in/yaml.v3" ) @@ -357,3 +359,50 @@ func CtxClosed(ctx context.Context) bool { return false } } + +func ExtractValuesFromTemplate(template string) []string { + re := regexp.MustCompile(`{{.*?\.([^\s|}]+).*?}}`) + matches := re.FindAllStringSubmatch(template, -1) + + values := make([]string, 0, len(matches)) + + for _, match := range matches { + values = append(values, match[1]) + } + + return values +} + +// TopologicalSort sorts the given items in topological order using the provided +// function to extract node name and dependencies. +// Returns the sorted node names, a flag indicating if any dependencies exist, +// and an error if a cycle is detected. +func TopologicalSort[T any](items []T, nodeFunc func(T) (string, []string)) ([]string, bool, error) { + var ( + graph = make(map[string][]string, len(items)) + sortedVertexes = make([]string, len(items)) + hasDependencies bool + err error + ) + + for i, item := range items { + name, dependencies := nodeFunc(item) + if len(dependencies) > 0 { + hasDependencies = true + } + + sortedVertexes[i] = name + graph[name] = dependencies + } + + if !hasDependencies { + return sortedVertexes, false, nil + } + + sortedVertexes, err = toposort.ReverseTarjan(graph) + if err != nil { + return nil, false, errors.New(err.Error()) + } + + return sortedVertexes, hasDependencies, nil +} diff --git a/internal/generator/common/utils_test.go b/internal/generator/common/utils_test.go index 96b5529..eb8e608 100644 --- a/internal/generator/common/utils_test.go +++ b/internal/generator/common/utils_test.go @@ -697,3 +697,119 @@ func TestWalkWithFilter(t *testing.T) { t.Run(tc.name, func(t *testing.T) { testFunc(t, tc) }) } } + +func TestExtractValuesFromTemplate(t *testing.T) { + type testCase struct { + name string + template string + expected []string + } + + testCases := []testCase{ + { + name: "Empty template", + template: "", + expected: []string{}, + }, + { + name: "Valid template", + template: "{{ .foo }}.{{.boo}}", + expected: []string{"foo", "boo"}, + }, + { + name: "Template with functions", + template: "{{ upper .foo | lower }}@{{ .boo }}", + expected: []string{"foo", "boo"}, + }, + { + name: "Invalid template", + template: "{_{ foo }}", + expected: []string{}, + }, + } + + testFunc := func(t *testing.T, tc testCase) { + t.Helper() + + actual := ExtractValuesFromTemplate(tc.template) + require.Equal(t, tc.expected, actual) + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { testFunc(t, tc) }) + } +} + +func TestTopologicalSort(t *testing.T) { + type node struct { + name string + deps []string + } + + type testCase struct { + name string + items []node + wantErr bool + wantDependencies bool + expected []string + } + + testCases := []testCase{ + { + name: "Empty items", + items: []node{}, + wantErr: false, + wantDependencies: false, + expected: []string{}, + }, + { + name: "Items with dependencies", + items: []node{ + {name: "1", deps: []string{"3"}}, + {name: "2", deps: []string{"4"}}, + {name: "3", deps: []string{"2"}}, + {name: "4", deps: []string{}}, + }, + wantErr: false, + wantDependencies: true, + expected: []string{"4", "2", "3", "1"}, + }, + { + name: "Items without dependencies", + items: []node{ + {name: "1", deps: []string{}}, + {name: "2", deps: []string{}}, + {name: "3", deps: []string{}}, + }, + wantErr: false, + wantDependencies: false, + expected: []string{"1", "2", "3"}, + }, + { + name: "Items with cycle dependencies", + items: []node{ + {name: "1", deps: []string{"2"}}, + {name: "2", deps: []string{"1"}}, + }, + wantErr: true, + wantDependencies: false, + expected: nil, + }, + } + + testFunc := func(t *testing.T, tc testCase) { + t.Helper() + + actual, hasDependencies, err := TopologicalSort(tc.items, func(node node) (string, []string) { + return node.name, node.deps + }) + + require.Equal(t, tc.wantErr, err != nil) + require.Equal(t, tc.wantDependencies, hasDependencies) + require.Equal(t, tc.expected, actual) + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { testFunc(t, tc) }) + } +} diff --git a/internal/generator/models/generator_model.go b/internal/generator/models/generator_model.go index e3c451f..e32f6ff 100644 --- a/internal/generator/models/generator_model.go +++ b/internal/generator/models/generator_model.go @@ -426,6 +426,16 @@ func (p *Params) Validate() []error { errs = append(errs, datetimeParamsErrs...) } + if p.StringParams != nil && p.StringParams.Template != "" { + if p.Ordered { + errs = append(errs, errors.New("forbidden to use string template with ordered")) + } + + if common.Any(p.DistinctPercentage != 0, p.DistinctCount != 0) { + errs = append(errs, errors.New("forbidden to use string template with distinct params")) + } + } + // must be called only after parsing, filling defaults and validation of TypeParams. if p.Values != nil { if err := p.PostProcess(); err != nil { @@ -674,6 +684,7 @@ type ColumnStringParams struct { Locale string `backup:"true" json:"locale" yaml:"locale"` LogicalType string `backup:"true" json:"logical_type" yaml:"logical_type"` Template string `backup:"true" json:"template" yaml:"template"` + Pattern string `backup:"true" json:"pattern" yaml:"pattern"` WithoutLargeLetters bool `backup:"true" json:"without_large_letters" yaml:"without_large_letters"` WithoutSmallLetters bool `backup:"true" json:"without_small_letters" yaml:"without_small_letters"` WithoutNumbers bool `backup:"true" json:"without_numbers" yaml:"without_numbers"` @@ -703,6 +714,10 @@ func (p *ColumnStringParams) FillDefaults() { func (p *ColumnStringParams) Validate() []error { var errs []error + if p.Template != "" && p.Pattern != "" { + errs = append(errs, errors.Errorf("forbidden to use template and pattern at the same time")) + } + if p.MinLength > p.MaxLength { errs = append(errs, errors.Errorf( "min length (%v) should be less than or equal to max length (%v)", diff --git a/internal/generator/models/generator_output.go b/internal/generator/models/generator_output.go index 9f7e729..7964b0a 100644 --- a/internal/generator/models/generator_output.go +++ b/internal/generator/models/generator_output.go @@ -14,7 +14,7 @@ import ( const ( DefaultOutputDir = "output" DefaultOutputType = "csv" - defaultFormatTemplate = `{ "table_name": "{{ .ModelName }}", "rows": {{ json .Rows }} }` + defaultFormatTemplate = `{ "table_name": "{{ .ModelName }}", "rows": {{ rowsJson .ColumnNames .Rows }} }` tcsTimeoutHeader = "x-tcs-timeout_ms" ParquetDateTimeMillisFormat = "millis" ParquetDateTimeMicrosFormat = "micros" diff --git a/internal/generator/output/general/writer/http/helpers.go b/internal/generator/output/general/writer/http/helpers.go new file mode 100644 index 0000000..3b03501 --- /dev/null +++ b/internal/generator/output/general/writer/http/helpers.go @@ -0,0 +1,51 @@ +package http + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" +) + +func toJSON(v any) (string, error) { + data, err := json.Marshal(v) + + return string(data), err +} + +func length(v any) int { + return reflect.ValueOf(v).Len() +} + +func rowsJSON(columnNames []string, rows [][]any) (string, error) { + var sb strings.Builder + + sb.WriteByte('[') + + for i, row := range rows { + if i > 0 { + sb.WriteByte(',') + } + + sb.WriteByte('{') + + for j, columnName := range columnNames { + if j > 0 { + sb.WriteByte(',') + } + + value, err := toJSON(row[j]) + if err != nil { + return "", err + } + + fmt.Fprintf(&sb, `"%s":%s`, columnName, value) + } + + sb.WriteByte('}') + } + + sb.WriteByte(']') + + return sb.String(), nil +} diff --git a/internal/generator/output/general/writer/http/http.go b/internal/generator/output/general/writer/http/http.go index 1fdde13..2d841e1 100644 --- a/internal/generator/output/general/writer/http/http.go +++ b/internal/generator/output/general/writer/http/http.go @@ -3,10 +3,8 @@ package http import ( "bytes" "context" - "encoding/json" "io" "net/http" - "reflect" "strings" "sync" "text/template" @@ -20,13 +18,15 @@ import ( ) const ( + maxBodySize = 1 << 20 // 1 Mb retryWaitMin = 1 * time.Second retryWaitMax = 10 * time.Minute ) type bodyPayload struct { - ModelName string - Rows []map[string]any + ModelName string + ColumnNames []string + Rows [][]any } // Verify interface compliance in compile time. @@ -41,6 +41,8 @@ type Writer struct { retryableClient *retryablehttp.Client lastErr error + payloadPool *sync.Pool + buffer []*models.DataRow bodyTemplate *template.Template @@ -60,11 +62,27 @@ func NewWriter( config *models.HTTPParams, writtenRowsChan chan<- uint64, ) *Writer { + columnNames := make([]string, len(model.Columns)) + for i, columns := range model.Columns { + columnNames[i] = columns.Name + } + + payloadPool := &sync.Pool{ + New: func() any { + return &bodyPayload{ + ModelName: model.Name, + ColumnNames: columnNames, + Rows: make([][]any, 0, config.BatchSize), + } + }, + } + httpWriter := &Writer{ ctx: ctx, model: model, config: config, writtenRowsChan: writtenRowsChan, + payloadPool: payloadPool, buffer: make([]*models.DataRow, 0, config.BatchSize), writerChan: make(chan []*models.DataRow), errorsChan: make(chan error, 1), @@ -131,15 +149,10 @@ func (w *Writer) Init() error { return errors.New("the writer has already been initialized") } - tmpl := template.New("body").Funcs(template.FuncMap{ - "json": func(v any) (string, error) { - data, err := json.Marshal(v) - - return string(data), err - }, - "len": func(v any) int { - return reflect.ValueOf(v).Len() - }, + tmpl := template.New("format_template").Funcs(template.FuncMap{ + "json": toJSON, + "len": length, + "rowsJson": rowsJSON, }) tmpl, err := tmpl.Parse(w.config.FormatTemplate) @@ -211,44 +224,36 @@ func (w *Writer) handleBatch(batch []*models.DataRow) error { } func (w *Writer) buildRequest(dataRows []*models.DataRow) (*retryablehttp.Request, error) { - // Build a slice of row objects by mapping column names to their corresponding values. - // Each row is represented as a map[string]any, with column names as keys and values from dataRows. - rows := make([]map[string]any, 0, len(dataRows)) + // Grab a payload with a ready slice and reset length to zero, keep capacity. + // + //nolint:forcetypeassert + payload := w.payloadPool.Get().(*bodyPayload) + payload.Rows = payload.Rows[:0] for _, dataRow := range dataRows { - if len(dataRow.Values) != len(w.model.Columns) { - return nil, errors.New("values count does not match columns count") - } - - rowObj := make(map[string]any, len(dataRow.Values)) - for i, value := range dataRow.Values { - rowObj[w.model.Columns[i].Name] = value - } - - rows = append(rows, rowObj) + payload.Rows = append(payload.Rows, dataRow.Values) } // Prepare the data payload for the request template rendering. // The payload includes the model name and structured row data. - body := bodyPayload{ - ModelName: w.model.Name, - Rows: rows, - } - - var buf bytes.Buffer + buffer := new(bytes.Buffer) - err := w.bodyTemplate.Execute(&buf, body) + err := w.bodyTemplate.Execute(buffer, payload) if err != nil { + w.payloadPool.Put(payload) + return nil, errors.New(err.Error()) } + w.payloadPool.Put(payload) + // Construct the HTTP POST request with the generated JSON body and apply configured headers. req, err := retryablehttp.NewRequest( http.MethodPost, w.config.Endpoint, - &buf, + buffer, ) if err != nil { return nil, errors.New(err.Error()) @@ -275,13 +280,9 @@ func (w *Writer) sendRequest(req *retryablehttp.Request) error { return errors.New(err.Error()) } - - if resp == nil { - return errors.New("received nil response") - } defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) + body, err := io.ReadAll(io.LimitReader(resp.Body, maxBodySize)) if err != nil { return errors.New(err.Error()) } diff --git a/internal/generator/output/general/writer/http/http_test.go b/internal/generator/output/general/writer/http/http_test.go index 289e252..7bc185e 100644 --- a/internal/generator/output/general/writer/http/http_test.go +++ b/internal/generator/output/general/writer/http/http_test.go @@ -55,7 +55,7 @@ func TestHandleRowsBatch(t *testing.T) { "meta": { "rows_count": {{ len .Rows }} }, - "rows": {{ json .Rows }} + "rows": {{ rowsJson .ColumnNames .Rows }} }`, model: &models.Model{ Name: "expectedModel", diff --git a/internal/generator/usecase/general/generator/generator.go b/internal/generator/usecase/general/generator/generator.go index 7e548fc..286a259 100644 --- a/internal/generator/usecase/general/generator/generator.go +++ b/internal/generator/usecase/general/generator/generator.go @@ -67,7 +67,6 @@ func NewColumnGenerator( } rangeGenerators = append(rangeGenerators, gen) - rangeRowsOffset += rangeRowsCount } @@ -200,7 +199,7 @@ type valueID struct { type BatchGenerator struct { numbers []valueID nextNumber int - valuer func(number valueID) (any, error) + valuer func(number valueID, rowValues map[string]any) (any, error) } func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator { @@ -226,14 +225,14 @@ func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator { } } - valuer := func(id valueID) (any, error) { + valuer := func(id valueID, rowValues map[string]any) (any, error) { vg := cg.rangeGenerators[id.generatorIndex] if vg.nullPercentage > 0 && fastRandomFloat(cg.dataColumnSeed+uint64(id.number)) < vg.nullPercentage { return nil, nil //nolint:nilnil } - return vg.generator.Value(id.number) + return vg.generator.Value(id.number, rowValues) } return &BatchGenerator{ @@ -243,8 +242,8 @@ func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator { } // Value returns random value for described column. -func (g *BatchGenerator) Value() (any, error) { - res, err := g.valuer(g.numbers[g.nextNumber]) +func (g *BatchGenerator) Value(rowValues map[string]any) (any, error) { + res, err := g.valuer(g.numbers[g.nextNumber], rowValues) g.nextNumber++ g.nextNumber %= len(g.numbers) diff --git a/internal/generator/usecase/general/generator/value/datetime.go b/internal/generator/usecase/general/generator/value/datetime.go index 84b1cab..600e89b 100644 --- a/internal/generator/usecase/general/generator/value/datetime.go +++ b/internal/generator/usecase/general/generator/value/datetime.go @@ -26,7 +26,7 @@ func (g *DateTimeGenerator) SetTotalCount(totalValuesCount uint64) error { } // Value returns n-th date from range. -func (g *DateTimeGenerator) Value(number float64) (any, error) { +func (g *DateTimeGenerator) Value(number float64, _ map[string]any) (any, error) { fromSec := g.From.Unix() toSec := g.To.Unix() diff --git a/internal/generator/usecase/general/generator/value/enum.go b/internal/generator/usecase/general/generator/value/enum.go index 8c8e4f5..18d4413 100644 --- a/internal/generator/usecase/general/generator/value/enum.go +++ b/internal/generator/usecase/general/generator/value/enum.go @@ -31,7 +31,7 @@ func (g *EnumGenerator) SetTotalCount(totalValuesCount uint64) error { return nil } -func (g *EnumGenerator) Value(number float64) (any, error) { +func (g *EnumGenerator) Value(number float64, _ map[string]any) (any, error) { idx := int(math.Floor(number)) / g.rowsPerValue return g.Values[idx], nil diff --git a/internal/generator/usecase/general/generator/value/float.go b/internal/generator/usecase/general/generator/value/float.go index 0b68655..c1903a4 100644 --- a/internal/generator/usecase/general/generator/value/float.go +++ b/internal/generator/usecase/general/generator/value/float.go @@ -26,7 +26,7 @@ func (g *FloatGenerator) SetTotalCount(totalValuesCount uint64) error { } // Value returns n-th float number from range. -func (g *FloatGenerator) Value(number float64) (any, error) { +func (g *FloatGenerator) Value(number float64, _ map[string]any) (any, error) { value := orderedFloat64(g.From, g.To, number, g.totalValuesCount) if g.BitWidth == 32 { //nolint:mnd diff --git a/internal/generator/usecase/general/generator/value/integer.go b/internal/generator/usecase/general/generator/value/integer.go index 6956e3d..c83b1ba 100644 --- a/internal/generator/usecase/general/generator/value/integer.go +++ b/internal/generator/usecase/general/generator/value/integer.go @@ -22,7 +22,7 @@ func (g *IntegerGenerator) SetTotalCount(totalValuesCount uint64) error { } // Value returns n-th integer number from range. -func (g *IntegerGenerator) Value(number float64) (any, error) { +func (g *IntegerGenerator) Value(number float64, _ map[string]any) (any, error) { value := orderedInt64(g.From, g.To, number, g.totalValuesCount) switch g.BitWidth { diff --git a/internal/generator/usecase/general/generator/value/interfaces.go b/internal/generator/usecase/general/generator/value/interfaces.go index c67e5da..b669412 100644 --- a/internal/generator/usecase/general/generator/value/interfaces.go +++ b/internal/generator/usecase/general/generator/value/interfaces.go @@ -7,7 +7,7 @@ type Generator interface { // SetTotalCount method should remember count of rows to generate SetTotalCount(totalValuesCount uint64) error // Value method should return ordered unique value by number - Value(number float64) (any, error) + Value(number float64, rowValues map[string]any) (any, error) // ValuesCount method should return the number of possible values to generate ValuesCount() float64 } diff --git a/internal/generator/usecase/general/generator/value/string.go b/internal/generator/usecase/general/generator/value/string.go index fc1d7c5..280b6b0 100644 --- a/internal/generator/usecase/general/generator/value/string.go +++ b/internal/generator/usecase/general/generator/value/string.go @@ -1,10 +1,13 @@ package value import ( + "bytes" "math" "math/big" "slices" "strings" + "sync" + "text/template" "github.com/pkg/errors" "github.com/tarantool/sdvg/internal/generator/models" @@ -20,6 +23,8 @@ var _ Generator = (*StringGenerator)(nil) type StringGenerator struct { *models.ColumnStringParams totalValuesCount uint64 + template *template.Template + bufPool *sync.Pool localeModule locale.LocalModule charset []rune countByPrefix []float64 @@ -29,6 +34,26 @@ type StringGenerator struct { //nolint:cyclop func (g *StringGenerator) Prepare() error { + if g.Template != "" { + tmpl, err := template.New("template"). + Option("missingkey=error"). + Funcs(template.FuncMap{ + "upper": strings.ToUpper, + "lower": strings.ToLower, + }). + Parse(g.Template) + if err != nil { + return errors.Errorf("failed to parse template: %s", err.Error()) + } + + g.template = tmpl + g.bufPool = &sync.Pool{ + New: func() any { + return new(bytes.Buffer) + }, + } + } + switch g.Locale { case "ru": g.localeModule = ru.NewLocaleModule(g.LogicalType, g.MinLength, g.MaxLength) @@ -171,8 +196,28 @@ func (g *StringGenerator) calculateCompletions(length int) []int64 { } // templateString returns n-th string by template. -func (g *StringGenerator) templateString(number float64) string { - val := []rune(g.Template) +// +//nolint:forcetypeassert +func (g *StringGenerator) templateString(rowValues map[string]any) (string, error) { + buf := g.bufPool.Get().(*bytes.Buffer) + buf.Reset() + + err := g.template.Execute(buf, rowValues) + if err != nil { + g.bufPool.Put(buf) + + return "", errors.New(err.Error()) + } + + val := buf.String() + g.bufPool.Put(buf) + + return val, nil +} + +// patternString returns n-th string by pattern. +func (g *StringGenerator) patternString(number float64) string { + val := []rune(g.Pattern) index := number / float64(g.totalValuesCount) for i := range val { @@ -410,9 +455,18 @@ func (g *StringGenerator) simpleString(number float64) string { } // Value returns n-th string from range. -func (g *StringGenerator) Value(number float64) (any, error) { +func (g *StringGenerator) Value(number float64, rowValues map[string]any) (any, error) { if g.Template != "" { - return g.templateString(number), nil + val, err := g.templateString(rowValues) + if err != nil { + return nil, errors.WithMessage(err, "failed to render template string") + } + + return val, nil + } + + if g.Pattern != "" { + return g.patternString(number), nil } switch g.LogicalType { @@ -432,13 +486,29 @@ func (g *StringGenerator) Value(number float64) (any, error) { //nolint:cyclop func (g *StringGenerator) ValuesCount() float64 { if g.Template != "" { - totalCount := float64(0) - totalCount += math.Pow(float64(len(g.localeModule.LargeLetters())), float64(strings.Count(g.Template, "A"))) - totalCount += math.Pow(float64(len(g.localeModule.SmallLetters())), float64(strings.Count(g.Template, "a"))) - totalCount += math.Pow(float64(len(locale.Numbers)), float64(strings.Count(g.Template, "0"))) - totalCount += math.Pow(float64(len(locale.SpecialChars)), float64(strings.Count(g.Template, "#"))) + return 1.0 + } - return totalCount + if g.Pattern != "" { + total := 1.0 + + if count := strings.Count(g.Pattern, "A"); count > 0 { + total *= math.Pow(float64(len(g.localeModule.LargeLetters())), float64(count)) + } + + if count := strings.Count(g.Pattern, "a"); count > 0 { + total *= math.Pow(float64(len(g.localeModule.SmallLetters())), float64(count)) + } + + if count := strings.Count(g.Pattern, "0"); count > 0 { + total *= math.Pow(float64(len(locale.Numbers)), float64(count)) + } + + if count := strings.Count(g.Pattern, "#"); count > 0 { + total *= math.Pow(float64(len(locale.SpecialChars)), float64(count)) + } + + return total } switch g.LogicalType { diff --git a/internal/generator/usecase/general/generator/value/uuid.go b/internal/generator/usecase/general/generator/value/uuid.go index 7bf922b..ca32580 100644 --- a/internal/generator/usecase/general/generator/value/uuid.go +++ b/internal/generator/usecase/general/generator/value/uuid.go @@ -25,7 +25,7 @@ func (g *UUIDGenerator) SetTotalCount(totalValuesCount uint64) error { } // Value returns n-th UUID from range. -func (g *UUIDGenerator) Value(number float64) (any, error) { +func (g *UUIDGenerator) Value(number float64, _ map[string]any) (any, error) { res := uuid.UUID{} index := number / float64(g.totalValuesCount) diff --git a/internal/generator/usecase/general/task.go b/internal/generator/usecase/general/task.go index 2bf435e..7560e78 100644 --- a/internal/generator/usecase/general/task.go +++ b/internal/generator/usecase/general/task.go @@ -113,6 +113,38 @@ func newGenerators(cfg *models.GenerationConfig) (map[string]*generator.ColumnGe return generators, nil } +func columnsIdxTopologicalSort(columns []*models.Column) ([]int, bool, error) { + sortedNames, hasDeps, err := common.TopologicalSort( + columns, + func(c *models.Column) (string, []string) { + var deps []string + + for _, r := range c.Ranges { + if r.StringParams != nil && r.StringParams.Template != "" { + deps = append(deps, common.ExtractValuesFromTemplate(r.StringParams.Template)...) + } + } + + return c.Name, deps + }, + ) + if err != nil { + return nil, false, err + } + + originColumnsIndexes := make(map[string]int, len(columns)) + for index, column := range columns { + originColumnsIndexes[column.Name] = index + } + + sortedIndexes := make([]int, len(sortedNames)) + for i, columnName := range sortedNames { + sortedIndexes[i] = originColumnsIndexes[columnName] + } + + return sortedIndexes, hasDeps, nil +} + // RunTask function generates unique values and then all values for selected model. func (t *Task) RunTask(ctx context.Context, callback func()) { started := make(chan struct{}) @@ -171,9 +203,9 @@ func (t *Task) WaitError() error { } // generateAndSaveValues function generates values for all model. -func (t *Task) generateAndSaveValues(ctx context.Context) error { - var err error +// +func (t *Task) generateAndSaveValues(ctx context.Context) (err error) { ctx, cancelCtx := context.WithCancelCause(ctx) defer cancelCtx(err) @@ -201,6 +233,11 @@ func (t *Task) generateAndSaveValues(ctx context.Context) error { continue } + sortedColumnsIndexes, hasDependencies, err := columnsIdxTopologicalSort(model.Columns) + if err != nil { + return errors.WithMessagef(err, "failed to sorting columns by dependencies for model %q", modelName) + } + pool.Add(1) go func() { @@ -221,7 +258,12 @@ func (t *Task) generateAndSaveValues(ctx context.Context) error { generators = append(generators, t.generators[columnKey].NewBatchGenerator(rowsCount)) } - pool.Submit(ctx, outputSyncer.WorkerSyncer(), modelName, generators, rowsCount) + pool.Submit( + ctx, outputSyncer.WorkerSyncer(), + model, hasDependencies, + sortedColumnsIndexes, + generators, rowsCount, + ) } }() } @@ -242,7 +284,7 @@ func (t *Task) generateAndSaveValues(ctx context.Context) error { slog.Debug("generating values for all models finished") - return nil + return err } func (t *Task) skipRows() { @@ -255,7 +297,9 @@ func (t *Task) skipRows() { // generateAndSaveBatch function generate batch of values for selected column and send it to output. func (t *Task) generateAndSaveBatch( ctx context.Context, outputSync *common.WorkerSyncer, - modelName string, generators []*generator.BatchGenerator, count uint64, + model *models.Model, hasDependencies bool, + columnsIndexesTopologicalOrder []int, + generators []*generator.BatchGenerator, count uint64, ) error { defer outputSync.Done(ctx) @@ -266,29 +310,38 @@ func (t *Task) generateAndSaveBatch( } } - for g, gen := range generators { - for i := range count { + var rowValues map[string]any + if hasDependencies { + rowValues = make(map[string]any, len(generators)) + } + + for i := range count { + for _, columnIdx := range columnsIndexesTopologicalOrder { if common.CtxClosed(ctx) { return &common.ContextCancelError{} } - value, err := gen.Value() + value, err := generators[columnIdx].Value(rowValues) if err != nil { return errors.WithMessage(err, "failed to get or generate value") } - batch[i].Values[g] = value + batch[i].Values[columnIdx] = value + + if rowValues != nil { + rowValues[model.Columns[columnIdx].Name] = value + } } } outputSync.WaitPrevious(ctx) - err := t.output.HandleRowsBatch(ctx, modelName, batch) + err := t.output.HandleRowsBatch(ctx, model.Name, batch) if err != nil { return errors.WithMessage(err, "failed to save batch to output") } - t.progress.Add(modelName, count) + t.progress.Add(model.Name, count) return nil } diff --git a/internal/generator/usecase/general/test/unit_test.go b/internal/generator/usecase/general/test/unit_test.go index 6efad38..0243f2e 100644 --- a/internal/generator/usecase/general/test/unit_test.go +++ b/internal/generator/usecase/general/test/unit_test.go @@ -16,6 +16,7 @@ import ( outputMock "github.com/tarantool/sdvg/internal/generator/output/mock" "github.com/tarantool/sdvg/internal/generator/usecase" usecaseGeneral "github.com/tarantool/sdvg/internal/generator/usecase/general" + "github.com/tarantool/sdvg/internal/generator/usecase/general/generator/value" ) const ( @@ -69,12 +70,12 @@ func deepColumnCopy(c *models.Column) *models.Column { func toString(t *testing.T, anyValue any) string { t.Helper() - value, err := json.Marshal(anyValue) + val, err := json.Marshal(anyValue) if err != nil { - t.Fatalf("Failed to json marshal of %v: %s", value, err) + t.Fatalf("Failed to json marshal of %v: %s", val, err) } - return string(value) + return string(val) } func getCfg(t *testing.T, model map[string]*models.Model) models.GenerationConfig { @@ -192,13 +193,22 @@ func checkDistinct(t *testing.T, column *models.Column) { for i := range UnitDefaultRowsCount { require.Len(t, handled[i].Values, 1, "column: %+v\n handled: %+v", column, handled) - value := toString(t, handled[i].Values[0]) - _, alreadyHas := uniqueMap[value] - require.False(t, alreadyHas, "value: %+v\nmap: %+v", value, uniqueMap) - uniqueMap[value] = true + val := toString(t, handled[i].Values[0]) + _, alreadyHas := uniqueMap[val] + require.False(t, alreadyHas, "value: %+v\nmap: %+v", val, uniqueMap) + uniqueMap[val] = true } } +func checkValuesCount(t *testing.T, gen value.Generator, expectedValueCount float64) { + t.Helper() + + require.NoError(t, gen.Prepare()) + + valuesCount := gen.ValuesCount() + require.Equal(t, uint64(expectedValueCount), uint64(valuesCount)) +} + func checkForeignKey(t *testing.T, column *models.Column, nullPercentage float64, foreignOrdered bool) { t.Helper() @@ -214,6 +224,7 @@ func checkForeignKey(t *testing.T, column *models.Column, nullPercentage float64 "foreign": { RowsCount: UnitDefaultRowsCount * 2, Columns: []*models.Column{{ + Name: "foreign_key", ForeignKey: "orig.test", Params: &models.Params{Ordered: foreignOrdered}, }}, @@ -237,10 +248,10 @@ func checkForeignKey(t *testing.T, column *models.Column, nullPercentage float64 continue } - value := toString(t, origHandled[i].Values[0]) - _, alreadyHas := origMap[value] - require.False(t, alreadyHas, "value: %+v\nmap: %+v", value, origMap) - origMap[value] = true + val := toString(t, origHandled[i].Values[0]) + _, alreadyHas := origMap[val] + require.False(t, alreadyHas, "value: %+v\nmap: %+v", val, origMap) + origMap[val] = true } for i := range UnitDefaultRowsCount * 2 { @@ -266,9 +277,9 @@ func checkForeignKey(t *testing.T, column *models.Column, nullPercentage float64 } } - value := toString(t, foreignHandled[i].Values[0]) - _, alreadyHas := origMap[value] - require.True(t, alreadyHas, "value: %+v (#%d)\nmap: %+v", value, i, origMap) + val := toString(t, foreignHandled[i].Values[0]) + _, alreadyHas := origMap[val] + require.True(t, alreadyHas, "value: %+v (#%d)\nmap: %+v", val, i, origMap) } } @@ -310,7 +321,11 @@ func TestInteger(t *testing.T) { } for _, testCase := range checkTypeCases { - column := &models.Column{Type: "integer", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "integers", + Type: "integer", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkType(t, column, testCase.expected) checkOrdered(t, column) @@ -357,10 +372,29 @@ func TestInteger(t *testing.T) { } for _, testCase := range checkValueCases { - column := &models.Column{Type: "integer", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "integers", + Type: "integer", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkValue(t, column, testCase.expected) } + + checkValuesCountCases := []struct { + typeParams *models.ColumnIntegerParams + expected float64 + }{ + {&models.ColumnIntegerParams{From: 1, To: 5}, 5}, + {&models.ColumnIntegerParams{From: 100, To: 1000}, 901}, + {&models.ColumnIntegerParams{From: 1, To: 1}, 1}, + {&models.ColumnIntegerParams{From: 123, To: 654}, 532}, + } + + for _, testCase := range checkValuesCountCases { + generator := &value.IntegerGenerator{ColumnIntegerParams: testCase.typeParams} + checkValuesCount(t, generator, testCase.expected) + } } func TestFloat(t *testing.T) { @@ -382,7 +416,11 @@ func TestFloat(t *testing.T) { } for _, testCase := range checkTypeCases { - column := &models.Column{Type: "float", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "floats", + Type: "float", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkType(t, column, testCase.expected) checkOrdered(t, column) @@ -413,10 +451,29 @@ func TestFloat(t *testing.T) { } for _, testCase := range checkValueCases { - column := &models.Column{Type: "float", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "floats", + Type: "float", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkValue(t, column, testCase.expected) } + + checkValuesCountCases := []struct { + typeParams *models.ColumnFloatParams + expected float64 + }{ + {&models.ColumnFloatParams{From: 1.021, To: 5.554433}, math.Inf(1)}, + {&models.ColumnFloatParams{From: 195.2345, To: 1000}, math.Inf(1)}, + {&models.ColumnFloatParams{From: 0.12345, To: 1}, math.Inf(1)}, + {&models.ColumnFloatParams{From: 123, To: 654}, math.Inf(1)}, + } + + for _, testCase := range checkValuesCountCases { + generator := &value.FloatGenerator{ColumnFloatParams: testCase.typeParams} + checkValuesCount(t, generator, testCase.expected) + } } func TestString(t *testing.T) { @@ -437,9 +494,9 @@ func TestString(t *testing.T) { {&models.ColumnStringParams{LogicalType: models.LastNameType, MinLength: 4, MaxLength: 7}, 4, 7}, {&models.ColumnStringParams{LogicalType: models.PhoneType, MinLength: 10, MaxLength: 10}, 10, 10}, {&models.ColumnStringParams{MinLength: 100, MaxLength: 100}, 100, 100}, - {&models.ColumnStringParams{Template: "AAaa00##", Locale: "en"}, 8, 8}, - {&models.ColumnStringParams{Template: "AAaa00##", Locale: "ru"}, 8, 8}, - {&models.ColumnStringParams{Template: "0123456789012345678901234567890123456789"}, 40, 40}, + {&models.ColumnStringParams{Pattern: "AAaa00##", Locale: "en"}, 8, 8}, + {&models.ColumnStringParams{Pattern: "AAaa00##", Locale: "ru"}, 8, 8}, + {&models.ColumnStringParams{Pattern: "0123456789012345678901234567890123456789"}, 40, 40}, {&models.ColumnStringParams{LogicalType: models.TextType, MinLength: 3, MaxLength: 5}, 3, 5}, {&models.ColumnStringParams{LogicalType: models.TextType, MinLength: 254, MaxLength: 256}, 254, 256}, {&models.ColumnStringParams{LogicalType: models.TextType, MinLength: 510, MaxLength: 512}, 510, 512}, @@ -449,7 +506,11 @@ func TestString(t *testing.T) { } for _, testCase := range testCases { - column := &models.Column{Type: "string", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "strings", + Type: "string", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } handled := checkType(t, column, "") strValue, ok := handled[0].Values[0].(string) @@ -463,13 +524,117 @@ func TestString(t *testing.T) { checkDistinct(t, column) checkForeignKeyCases(t, column) } + + checkValuesCountCases := []struct { + typeParams *models.ColumnStringParams + expected float64 + }{ + { + &models.ColumnStringParams{ + MinLength: 1, + MaxLength: 1, + Locale: "en", + WithoutNumbers: true, + WithoutSpecialChars: true, + }, + 52, + }, + { + &models.ColumnStringParams{ + MinLength: 1, + MaxLength: 1, + Locale: "ru", + WithoutNumbers: true, + WithoutSpecialChars: true, + }, + 66.0, + }, + { + &models.ColumnStringParams{ + MinLength: 3, + MaxLength: 7, + Locale: "en", + WithoutNumbers: true, + WithoutSpecialChars: true, + }, + 1048229968448, + }, + { + &models.ColumnStringParams{ + MinLength: 2, + MaxLength: 9, + Locale: "ru", + WithoutNumbers: true, + WithoutSpecialChars: true, + }, + 24128259706319868, + }, + { + &models.ColumnStringParams{ + MinLength: 10, + MaxLength: 24, + Locale: "en", + WithoutLargeLetters: true, + WithoutSmallLetters: true, + WithoutSpecialChars: true, + }, + 1111111111111110000000000, + }, + { + &models.ColumnStringParams{ + MinLength: 1, + MaxLength: 8, + Locale: "en", + WithoutLargeLetters: true, + WithoutSmallLetters: true, + WithoutNumbers: true, + }, + 81870575520, + }, + { + &models.ColumnStringParams{ + MinLength: 10, + MaxLength: 15, + Locale: "en", + }, + 88394150280794134360488281250, + }, + { + &models.ColumnStringParams{ + MinLength: 10, + MaxLength: 15, + Locale: "ru", + }, + 868834460299970670989801640300, + }, + { + &models.ColumnStringParams{ + Locale: "en", + Template: "{{ .field }}", + }, + 1, + }, + { + &models.ColumnStringParams{ + Locale: "en", + Pattern: "A00", + }, + 2600, + }, + } + + for _, testCase := range checkValuesCountCases { + generator := &value.StringGenerator{ColumnStringParams: testCase.typeParams} + checkValuesCount(t, generator, testCase.expected) + } } func TestUUID(t *testing.T) { - column := &models.Column{Type: "uuid"} + column := &models.Column{Name: "uuids", Type: "uuid"} checkType(t, column, uuid.UUID{}) checkDistinct(t, column) checkForeignKeyCases(t, column) + checkValuesCount(t, &value.UUIDGenerator{}, float64(1<<(128-10)-1)) } func TestDateTime(t *testing.T) { @@ -486,7 +651,11 @@ func TestDateTime(t *testing.T) { } for _, testCase := range checkTypeCases { - column := &models.Column{Type: "datetime", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "datetimes", + Type: "datetime", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkType(t, column, testCase.expected) checkOrdered(t, column) @@ -503,10 +672,53 @@ func TestDateTime(t *testing.T) { } for _, testCase := range checkValueCases { - column := &models.Column{Type: "datetime", Ranges: []*models.Params{{TypeParams: testCase.typeParams}}} + column := &models.Column{ + Name: "datetimes", + Type: "datetime", + Ranges: []*models.Params{{TypeParams: testCase.typeParams}}, + } checkValue(t, column, testCase.expected) } + + checkValuesCountCases := []struct { + typeParams *models.ColumnDateTimeParams + expected float64 + }{ + { + &models.ColumnDateTimeParams{ + From: time.Date(2025, 7, 25, 10, 0, 0, 0, time.UTC), + To: time.Date(2025, 7, 25, 10, 0, 0, 0, time.UTC), + }, + 1, + }, + { + &models.ColumnDateTimeParams{ + From: time.Date(2025, 7, 25, 10, 0, 0, 500_000_000, time.UTC), + To: time.Date(2025, 7, 25, 10, 0, 5, 500_000_000, time.UTC), + }, + 6, + }, + { + &models.ColumnDateTimeParams{ + From: time.Date(2025, 7, 25, 10, 0, 0, 900_000_000, time.UTC), + To: time.Date(2025, 7, 25, 10, 0, 1, 100_000_000, time.UTC), + }, + 400_000_002, + }, + { + &models.ColumnDateTimeParams{ + From: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC), + To: time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC), + }, + 31_536_001, + }, + } + + for _, testCase := range checkValuesCountCases { + generator := &value.DateTimeGenerator{ColumnDateTimeParams: testCase.typeParams} + checkValuesCount(t, generator, testCase.expected) + } } func TestIdempotence(t *testing.T) { @@ -599,7 +811,7 @@ func TestIdempotence(t *testing.T) { Name: "passport", Type: "string", Ranges: []*models.Params{{TypeParams: &models.ColumnStringParams{ - Template: "AA 00 000 000", + Pattern: "AA 00 000 000", }, NullPercentage: 0.5}}, }, @@ -736,7 +948,11 @@ func TestEnum(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - column := &models.Column{Type: tc.dataType, Ranges: []*models.Params{{Values: tc.values}}} + column := &models.Column{ + Name: "enums", + Type: tc.dataType, + Ranges: []*models.Params{{Values: tc.values}}, + } cfg := oneColumnCfg(t, column) cfg.Models[UnitDefaultColumnName].RowsCount = tc.rowsCount @@ -745,7 +961,11 @@ func TestEnum(t *testing.T) { handledDataRows := generateFunc(t, cfg)[UnitDefaultColumnName] require.Len(t, handledDataRows, len(tc.expected)) - columnOrdered := &models.Column{Type: tc.dataType, Ranges: []*models.Params{{Values: tc.values, Ordered: true}}} + columnOrdered := &models.Column{ + Name: "enums", + Type: tc.dataType, + Ranges: []*models.Params{{Values: tc.values, Ordered: true}}, + } cfg = oneColumnCfg(t, columnOrdered) cfg.Models[UnitDefaultColumnName].RowsCount = tc.rowsCount @@ -755,8 +975,8 @@ func TestEnum(t *testing.T) { require.Len(t, handledDataRows, len(tc.expected)) for i := range handledDataRows { - value := handledDataRows[i].Values[0] - require.Equal(t, tc.expected[i], value) + val := handledDataRows[i].Values[0] + require.Equal(t, tc.expected[i], val) } }) } @@ -914,7 +1134,7 @@ func TestRanges(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - column := &models.Column{Type: tc.dataType, Ranges: tc.ranges} + column := &models.Column{Name: "ranges", Type: tc.dataType, Ranges: tc.ranges} cfg := oneColumnCfg(t, column) cfg.Models[UnitDefaultColumnName].RowsCount = UnitDefaultRowsCount @@ -929,9 +1149,9 @@ func TestRanges(t *testing.T) { } for i := range handledDataRows { - value := handledDataRows[i].Values[0] + val := handledDataRows[i].Values[0] - rangeIdx, err := mapValueToRange(tc.dataType, value, tc.ranges) + rangeIdx, err := mapValueToRange(tc.dataType, val, tc.ranges) require.NoError(t, err) expectedValuesAmountPerRange[rangeIdx]-- @@ -955,13 +1175,13 @@ func mapValueToRange(columnType string, value any, ranges []*models.Params) (int switch columnType { case "integer": - switch value := value.(type) { + switch val := value.(type) { case int32: - if int32(r.IntegerParams.From) <= value && value <= int32(r.IntegerParams.To) { + if int32(r.IntegerParams.From) <= val && val <= int32(r.IntegerParams.To) { return idx, nil } case int64: - if r.IntegerParams.From <= value && value <= r.IntegerParams.To { + if r.IntegerParams.From <= val && val <= r.IntegerParams.To { return idx, nil } } @@ -984,13 +1204,13 @@ func mapValueToRange(columnType string, value any, ranges []*models.Params) (int return idx, nil } case "float": - switch value := value.(type) { + switch val := value.(type) { case float32: - if float32(r.FloatParams.From) <= value && value <= float32(r.FloatParams.To) { + if float32(r.FloatParams.From) <= val && val <= float32(r.FloatParams.To) { return idx, nil } case float64: - if r.FloatParams.From <= value && value <= r.FloatParams.To { + if r.FloatParams.From <= val && val <= r.FloatParams.To { return idx, nil } }