diff --git a/example/hello.bozo b/example/hello.bozo new file mode 100644 index 00000000..2981978d --- /dev/null +++ b/example/hello.bozo @@ -0,0 +1,3 @@ +--- +hello: + - world diff --git a/format/defs.bzl b/format/defs.bzl index f521810f..d09840a0 100644 --- a/format/defs.bzl +++ b/format/defs.bzl @@ -19,7 +19,7 @@ load("@aspect_bazel_lib//lib:utils.bzl", "propagate_common_rule_attributes", "pr load("@rules_multirun//:defs.bzl", "command", "multirun") load("//format/private:formatter_binary.bzl", "BUILTIN_TOOL_LABELS", "CHECK_FLAGS", "FIX_FLAGS", "TOOLS", "to_attribute_name") -def _format_attr_factory(target_name, lang, toolname, tool_label, mode, disable_git_attribute_checks): +def _format_attr_factory(target_name, lang, toolname, tool_label, mode, disable_git_attribute_checks, extensions): if mode not in ["check", "fix", "test"]: fail("Invalid mode", mode) @@ -36,12 +36,13 @@ def _format_attr_factory(target_name, lang, toolname, tool_label, mode, disable_ # the apparent repository, starts with @@aspect_rules_lint~override "FIX_TARGET": "//{}:{}".format(native.package_name(), target_name), "tool": "$(rlocationpaths %s)" % tool_label, + "extensions": "$(rlocationpaths %s)" % extensions, "lang": lang, "flags": FIX_FLAGS[toolname] if mode == "fix" else CHECK_FLAGS[toolname], "mode": "check" if mode == "test" else mode, "disable_git_attribute_checks": "true" if disable_git_attribute_checks else "false", }, - "data": [tool_label], + "data": [tool_label, extensions], ("args" if mode == "test" else "arguments"): args, } @@ -71,7 +72,8 @@ Some languages have dialects: }, ) -def format_multirun(name, jobs = 4, print_command = False, disable_git_attribute_checks = False, **kwargs): + +def format_multirun(name, jobs = 4, print_command = False, disable_git_attribute_checks = False, extensions = "@aspect_rules_lint//format/private:extensions.json", **kwargs): """Create a [multirun] binary for the given languages. Intended to be used with `bazel run` to update source files in-place. @@ -102,7 +104,7 @@ def format_multirun(name, jobs = 4, print_command = False, disable_git_attribute command( command = Label("@aspect_rules_lint//format/private:format"), description = "Formatting {} with {}...".format(lang, toolname), - **_format_attr_factory(target_name, lang, toolname, tool_label, mode, disable_git_attribute_checks) + **_format_attr_factory(target_name, lang, toolname, tool_label, mode, disable_git_attribute_checks, extensions) ) commands.append(target_name) @@ -125,7 +127,7 @@ def format_multirun(name, jobs = 4, print_command = False, disable_git_attribute **common_attrs ) -def format_test(name, srcs = None, workspace = None, no_sandbox = False, disable_git_attribute_checks = False, tags = [], **kwargs): +def format_test(name, srcs = None, workspace = None, no_sandbox = False, disable_git_attribute_checks = False, tags = [], extensions = "@aspect_rules_lint//format/private:extensions.json", **kwargs): """Create test for the given formatters. Intended to be used with `bazel test` to verify files are formatted. @@ -159,7 +161,7 @@ def format_test(name, srcs = None, workspace = None, no_sandbox = False, disable kwargs.pop(k) for lang, toolname, tool_label, target_name in _tools_loop(name, kwargs): - attrs = _format_attr_factory(target_name, lang, toolname, tool_label, "test", disable_git_attribute_checks) + attrs = _format_attr_factory(target_name, lang, toolname, tool_label, "test", disable_git_attribute_checks, extensions) if srcs: attrs["data"] = [tool_label] + srcs attrs["args"] = ["$(location {})".format(i) for i in srcs] diff --git a/format/private/BUILD.bazel b/format/private/BUILD.bazel index 47134eeb..f0ad836e 100644 --- a/format/private/BUILD.bazel +++ b/format/private/BUILD.bazel @@ -1,6 +1,6 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") -exports_files(["format.sh"]) +exports_files(["format.sh", "extensions.json"]) sh_binary( name = "format", diff --git a/format/private/extensions.json b/format/private/extensions.json new file mode 100644 index 00000000..f89d823b --- /dev/null +++ b/format/private/extensions.json @@ -0,0 +1,31 @@ +{ + "C": ["*.c","*.cats","*.h","*.idc" ], + "C++": ["*.cpp","*.c++","*.cc","*.cp","*.cppm","*.cxx","*.h","*.h++","*.hh","*.hpp","*.hxx","*.inc","*.inl","*.ino","*.ipp","*.ixx","*.re","*.tcc","*.tpp","*.txx" ], + "Cuda": ["*.cu","*.cuh" ], + "CSS": ["*.css" ], + "Go": ["*.go" ], + "GraphQL": ["*.graphql","*.gql","*.graphqls" ], + "HTML": ["*.html","*.hta","*.htm","*.html.hl","*.inc","*.xht","*.xhtml" ], + "JSON": [".all-contributorsrc",".arcconfig",".auto-changelog",".c8rc",".htmlhintrc",".imgbotconfig",".nycrc",".tern-config",".tern-project",".watchmanconfig","Pipfile.lock","composer.lock","deno.lock","flake.lock","mcmod.info","*.json","*.4DForm","*.4DProject","*.avsc","*.geojson","*.gltf","*.har","*.ice","*.JSON-tmLanguage","*.jsonl","*.mcmeta","*.tfstate","*.tfstate.backup","*.topojson","*.webapp","*.webmanifest","*.yy","*.yyp" ], + "Java": ["*.java","*.jav","*.jsh" ], + "JavaScript": ["Jakefile","*.js","*._js","*.bones","*.cjs","*.es","*.es6","*.frag","*.gs","*.jake","*.javascript","*.jsb","*.jscad","*.jsfl","*.jslib","*.jsm","*.jspre","*.jss","*.jsx","*.mjs","*.njs","*.pac","*.sjs","*.ssjs","*.xsjs","*.xsjslib" ], + "Jsonnet": ["*.jsonnet","*.libsonnet" ], + "Kotlin": ["*.kt","*.ktm","*.kts" ], + "Less": ["*.less" ], + "Markdown": ["contents.lr","*.md","*.livemd","*.markdown","*.mdown","*.mdwn","*.mkd","*.mkdn","*.mkdown","*.ronn","*.scd","*.workbook" ], + "Protocol Buffer": ["*.proto" ], + "Python": [".gclient","DEPS","SConscript","SConstruct","wscript","*.py","*.cgi","*.fcgi","*.gyp","*.gypi","*.lmi","*.py3","*.pyde","*.pyi","*.pyp","*.pyt","*.pyw","*.rpy","*.spec","*.tac","*.wsgi","*.xpy" ], + "Rust": ["*.rs","*.rs.in" ], + "SQL": ["*.sql","*.cql","*.ddl","*.inc","*.mysql","*.prc","*.tab","*.udf","*.viw" ], + "SCSS": ["*.scss" ], + "Scala": ["*.scala","*.kojo","*.sbt","*.sc" ], + "Shell": [".bash_aliases",".bash_functions",".bash_history",".bash_logout",".bash_profile",".bashrc",".cshrc",".flaskenv",".kshrc",".login",".profile",".zlogin",".zlogout",".zprofile",".zshenv",".zshrc","9fs","PKGBUILD","bash_aliases","bash_logout","bash_profile","bashrc","cshrc","gradlew","kshrc","login","man","profile","zlogin","zlogout","zprofile","zshenv","zshrc","*.sh","*.bash","*.bats","*.cgi","*.command","*.fcgi","*.ksh","*.sh.in","*.tmux","*.tool","*.trigger","*.zsh","*.zsh-theme" ], + "Starlark": ["BUCK","BUILD","BUILD.bazel","MODULE.bazel","Tiltfile","WORKSPACE","WORKSPACE.bazel","*.bzl","*.star" ], + "Swift": ["*.swift" ], + "TSX": ["*.tsx" ], + "TypeScript": ["*.ts","*.cts","*.mts" ], + "Vue": ["*.vue" ], + "YAML": ["*.yml","*.yaml",".clang-format",".clang-tidy",".gemrc" ], + "XML": ["*.xml" ], + "Terraform": ["*.tf", "*.tfvars"] +} diff --git a/format/private/format.sh b/format/private/format.sh index 7008b0f7..02b51df8 100755 --- a/format/private/format.sh +++ b/format/private/format.sh @@ -56,9 +56,10 @@ function process_args_in_batches() { local lang="$1" local bin="$2" local flags="$3" - shift 3 + local extensions="$4" + shift 4 local args=("$@") - + # Uses up to ARG_MAX - 2k, or 128k, whichever is smaller, characters per # command. This was derived from following the defaults from xargs # https://www.gnu.org/software/findutils/manual/html_node/find_html/Limiting-Command-Size.html @@ -82,7 +83,7 @@ function process_args_in_batches() { # If no arguments were passed, still run run-format once if [ ${#args[@]} -eq 0 ]; then - run-format "$lang" "$bin" "$flags" + run-format "$lang" "$bin" "$flags" "$extensions" return fi @@ -93,7 +94,7 @@ function process_args_in_batches() { for arg in "${args[@]}"; do if ((current_batch_size + ${#arg} + 1 >= max_batch_size)); then # Process current batch - run-format "$lang" "$bin" "$flags" "${current_batch[@]}" + run-format "$lang" "$bin" "$flags" "$extensions" "${current_batch[@]}" current_batch=() current_batch_size=0 fi @@ -103,58 +104,14 @@ function process_args_in_batches() { # Process any remaining arguments if [ -n "$current_batch" ]; then - run-format "$lang" "$bin" "$flags" "${current_batch[@]}" + run-format "$lang" "$bin" "$flags" "$extensions" "${current_batch[@]}" fi } # Exports a function that is similar to 'git ls-files' # ls-files [...] function ls-files { - language="$1" && shift; - # Copied file patterns from - # https://github.com/github-linguist/linguist/blob/559a6426942abcae16b6d6b328147476432bf6cb/lib/linguist/languages.yml - # using the ./mirror_linguist_languages.sh tool to transform to Bash code - case "$language" in - 'C') patterns=('*.c' '*.cats' '*.h' '*.idc') ;; - 'C++') patterns=('*.cpp' '*.c++' '*.cc' '*.cp' '*.cppm' '*.cxx' '*.h' '*.h++' '*.hh' '*.hpp' '*.hxx' '*.inc' '*.inl' '*.ino' '*.ipp' '*.ixx' '*.re' '*.tcc' '*.tpp' '*.txx') ;; - 'Cuda') patterns=('*.cu' '*.cuh') ;; - 'CSS') patterns=('*.css') ;; - 'Go') patterns=('*.go') ;; - 'GraphQL') patterns=('*.graphql' '*.gql' '*.graphqls') ;; - 'HTML') patterns=('*.html' '*.hta' '*.htm' '*.html.hl' '*.inc' '*.xht' '*.xhtml') ;; - 'JSON') patterns=('.all-contributorsrc' '.arcconfig' '.auto-changelog' '.c8rc' '.htmlhintrc' '.imgbotconfig' '.nycrc' '.tern-config' '.tern-project' '.watchmanconfig' 'Pipfile.lock' 'composer.lock' 'deno.lock' 'flake.lock' 'mcmod.info' '*.json' '*.4DForm' '*.4DProject' '*.avsc' '*.geojson' '*.gltf' '*.har' '*.ice' '*.JSON-tmLanguage' '*.jsonl' '*.mcmeta' '*.tfstate' '*.tfstate.backup' '*.topojson' '*.webapp' '*.webmanifest' '*.yy' '*.yyp') ;; - 'Java') patterns=('*.java' '*.jav' '*.jsh') ;; - 'JavaScript') patterns=('Jakefile' '*.js' '*._js' '*.bones' '*.cjs' '*.es' '*.es6' '*.frag' '*.gs' '*.jake' '*.javascript' '*.jsb' '*.jscad' '*.jsfl' '*.jslib' '*.jsm' '*.jspre' '*.jss' '*.jsx' '*.mjs' '*.njs' '*.pac' '*.sjs' '*.ssjs' '*.xsjs' '*.xsjslib') ;; - 'Jsonnet') patterns=('*.jsonnet' '*.libsonnet') ;; - 'Kotlin') patterns=('*.kt' '*.ktm' '*.kts') ;; - 'Less') patterns=('*.less') ;; - 'Markdown') patterns=('contents.lr' '*.md' '*.livemd' '*.markdown' '*.mdown' '*.mdwn' '*.mkd' '*.mkdn' '*.mkdown' '*.ronn' '*.scd' '*.workbook') ;; - 'Protocol Buffer') patterns=('*.proto') ;; - 'Python') patterns=('.gclient' 'DEPS' 'SConscript' 'SConstruct' 'wscript' '*.py' '*.cgi' '*.fcgi' '*.gyp' '*.gypi' '*.lmi' '*.py3' '*.pyde' '*.pyi' '*.pyp' '*.pyt' '*.pyw' '*.rpy' '*.spec' '*.tac' '*.wsgi' '*.xpy') ;; - 'Rust') patterns=('*.rs' '*.rs.in') ;; - 'SQL') patterns=('*.sql' '*.cql' '*.ddl' '*.inc' '*.mysql' '*.prc' '*.tab' '*.udf' '*.viw') ;; - 'SCSS') patterns=('*.scss') ;; - 'Scala') patterns=('*.scala' '*.kojo' '*.sbt' '*.sc') ;; - 'Shell') patterns=('.bash_aliases' '.bash_functions' '.bash_history' '.bash_logout' '.bash_profile' '.bashrc' '.cshrc' '.flaskenv' '.kshrc' '.login' '.profile' '.zlogin' '.zlogout' '.zprofile' '.zshenv' '.zshrc' '9fs' 'PKGBUILD' 'bash_aliases' 'bash_logout' 'bash_profile' 'bashrc' 'cshrc' 'gradlew' 'kshrc' 'login' 'man' 'profile' 'zlogin' 'zlogout' 'zprofile' 'zshenv' 'zshrc' '*.sh' '*.bash' '*.bats' '*.cgi' '*.command' '*.fcgi' '*.ksh' '*.sh.in' '*.tmux' '*.tool' '*.trigger' '*.zsh' '*.zsh-theme') ;; - 'Starlark') patterns=('BUCK' 'BUILD' 'BUILD.bazel' 'MODULE.bazel' 'Tiltfile' 'WORKSPACE' 'WORKSPACE.bazel' '*.bzl' '*.star') ;; - 'Swift') patterns=('*.swift') ;; - 'TSX') patterns=('*.tsx') ;; - 'TypeScript') patterns=('*.ts' '*.cts' '*.mts') ;; - 'Vue') patterns=('*.vue') ;; - 'YAML') patterns=('*.yml' '*.yaml' '.clang-format' '.clang-tidy' '.gemrc') ;; - - # Note: terraform fmt cannot handle all HCL files such as .terraform.lock - # "Only .tf and .tfvars files can be processed with terraform fmt" - # so we define a custom language here instead of 'HCL' from github-linguist definition for the language. - # TODO: we should probably use https://terragrunt.gruntwork.io/docs/reference/cli-options/#hclfmt instead - # which does support the entire HCL language FWICT - 'Terraform') patterns=('*.tf' '*.tfvars') ;; - - *) - echo >&2 "Internal error: unknown language $language" - exit 1 - ;; - esac + patterns=("$1") && shift; if [ "$#" -eq 0 ]; then # When the formatter is run with no arguments, we run over "all files in the repo". @@ -240,9 +197,10 @@ function run-format { local lang="$1" && shift local bin="$1" && shift local args="$1" && shift + local ext=($1) && shift local TIMEFORMAT="Formatted ${lang} in %lR" + local files=$(ls-files $ext $@) - local files=$(ls-files "$lang" $@) if [ -n "$files" ] && [ -n "$bin" ]; then case "$lang" in 'Protocol Buffer') @@ -294,17 +252,24 @@ if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then exit 1 fi - process_args_in_batches "$lang" "$bin" "${flags:-""}" "$@" + extfile="$(rlocation $extensions)" + if [ ! -e "$extfile" ]; then + echo >&2 "cannot locate config $extensions" + exit 1 + fi + + ext=$(jq -jr ".\"${lang}\"| join(\" \")" < $extfile) + process_args_in_batches "$lang" "$bin" "${flags:-""}" "$ext" "$@" # Handle additional languages for JavaScript and CSS if [[ "$lang" == "JavaScript" ]]; then for sublang in "JSON" "TSX" "TypeScript" "Vue"; do - process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$@" + process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$ext" "$@" done fi if [[ "$lang" == "CSS" ]]; then for sublang in "Less" "SCSS"; do - process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$@" + process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$ext" "$@" done fi fi diff --git a/format/test/BUILD.bazel b/format/test/BUILD.bazel index a508b554..8d6c2731 100644 --- a/format/test/BUILD.bazel +++ b/format/test/BUILD.bazel @@ -59,3 +59,15 @@ format_multirun( terraform = ":mock_terraform-fmt.sh", yaml = ":mock_yamlfmt.sh", ) + +#format_extensions( +# name = ":custom_extensions", +# yaml = ["*.bozo"], +#) + +format_multirun( + name = "format_custom", + #TODO: point extensions to ":custom_extensions + extensions = ":extensions.json", + yaml = ":mock_yamlfmt.sh", +) diff --git a/format/test/extensions.json b/format/test/extensions.json new file mode 100644 index 00000000..342e8675 --- /dev/null +++ b/format/test/extensions.json @@ -0,0 +1,3 @@ +{ + "YAML": ["*.bozo"] +}