diff --git a/.github/workflows/validate-doc-metadata.yml b/.github/workflows/validate-doc-metadata.yml deleted file mode 100644 index acb5cf77930..00000000000 --- a/.github/workflows/validate-doc-metadata.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Validate Documentation Metadata -on: - pull_request: - # run on demand - workflow_dispatch: -jobs: - validate-doc-metadata: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - persist-credentials: false - fetch-depth: 0 - - name: Set up Python 3.9.x - uses: actions/setup-python@v3 - with: - python-version: "3.9.x" - - name: Install dependencies - run: | - python -m pip install yamale - python -m pip install yamllint - - name: Lint with yamllint - run: yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata - - name: Lint with yamllint - run: yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata/curated - - name: Validate metadata with yamale - run: python .tools/validation/validate_doc_metadata.py --doc-gen .doc_gen diff --git a/.github/workflows/pre-validate.yml b/.github/workflows/validate.yml similarity index 70% rename from .github/workflows/pre-validate.yml rename to .github/workflows/validate.yml index c30f4398658..d31f8610853 100644 --- a/.github/workflows/pre-validate.yml +++ b/.github/workflows/validate.yml @@ -27,6 +27,12 @@ jobs: uses: actions/setup-python@v3 with: python-version: 3.8 #install the python needed - - name: Run Python script to vet code examples # Runs a single command using the runners shell - run: | - python3 .tools/pre_validate/pre_validate.py --q + - name: Install dependencies + run: >- + python3 -m pip install -r .tools/base_requirements.txt + - name: Lint with yamllint + run: >- + yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata .doc_gen/metadata/curated + - name: Validate metadata with yamale + run: >- + python3 .tools/validate/validate.py --q --doc-gen .doc_gen diff --git a/.tools/base_requirements.txt b/.tools/base_requirements.txt index 95c92d15419..fcd1d2902bf 100644 --- a/.tools/base_requirements.txt +++ b/.tools/base_requirements.txt @@ -1,6 +1,7 @@ black==23.9.1 flake8==6.1.0 mypy-extensions==1.0.0 +pathspec==0.11.2 PyYAML==6.0.1 requests==2.31.0 types-PyYAML==6.0.12.12 diff --git a/.tools/pre_validate/README.md b/.tools/validation/README.md similarity index 100% rename from .tools/pre_validate/README.md rename to .tools/validation/README.md diff --git a/.tools/validation/validate_doc_metadata.py b/.tools/validation/metadata_validator.py similarity index 98% rename from .tools/validation/validate_doc_metadata.py rename to .tools/validation/metadata_validator.py index 11b72e0f43a..75cbd6fae14 100755 --- a/.tools/validation/validate_doc_metadata.py +++ b/.tools/validation/metadata_validator.py @@ -153,7 +153,7 @@ def _is_valid(self, value): def validate_files(schema_name: Path, meta_names: Iterable[Path], validators): """Iterate a list of files and validate each one against a schema.""" - success = True + success = 0 schema = yamale.make_schema(schema_name, validators=validators) for meta_name in meta_names: @@ -163,7 +163,7 @@ def validate_files(schema_name: Path, meta_names: Iterable[Path], validators): print(f"{meta_name.resolve()} validation success! 👍") except YamaleError as e: print(e.message) - success = False + success += 1 return success @@ -203,9 +203,9 @@ def validate_all(doc_gen: Path): ("curated_sources_schema.yaml", "curated/sources.yaml"), ("curated_example_schema.yaml", "curated/*_metadata.yaml"), ] - success = True + success = 0 for schema, metadata in to_validate: - success &= validate_files( + success += validate_files( schema_root / schema, (doc_gen / "metadata").glob(metadata), validators ) @@ -224,7 +224,7 @@ def main(): success = validate_all(Path(args.doc_gen)) - if success: + if success == 0: print("Validation succeeded! 👍👍👍") else: print("\n********************************************") diff --git a/.tools/pre_validate/pre_validate.py b/.tools/validation/project_validator.py similarity index 80% rename from .tools/pre_validate/pre_validate.py rename to .tools/validation/project_validator.py index b30930efab8..70c137ee038 100644 --- a/.tools/pre_validate/pre_validate.py +++ b/.tools/validation/project_validator.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ @@ -22,6 +23,9 @@ import argparse import logging import sys +from pathlib import Path +from pathspec import GitIgnoreSpec +from typing import Generator from words import WORDS logger = logging.getLogger(__name__) @@ -57,22 +61,6 @@ # action in a clean environment (aside from testing), # exhaustive ignore lists shouldn't be necessary. -# Folders to skip. -IGNORE_FOLDERS = { - ".doc_gen", - ".pytest_cache", - ".tools", - ".venv", - "__pycache__", - "bin", - "cdk.out", - "dist", - "node_modules", - "obj", - "target", - "venv", -} - # Files to skip. IGNORE_FILES = { ".moviedata.json", @@ -208,7 +196,44 @@ } -def check_files(root, quiet): +def match_path_to_specs(path: Path, specs: list[GitIgnoreSpec]) -> bool: + """ + Return True if we should skip this path, that is, it is matched by a .gitignore. + """ + for spec in specs: + if spec.match_file(path): + return True + return False + + +def walk_with_gitignore( + root: Path, specs: list[GitIgnoreSpec] = [] +) -> Generator[Path, None, None]: + gitignore = root / ".gitignore" + if gitignore.exists(): + with open(root / ".gitignore", "r", encoding="utf-8") as gitignore: + specs = [*specs, GitIgnoreSpec.from_lines(gitignore.readlines())] + for entry in os.scandir(root): + if not match_path_to_specs(entry.path, specs): + path = Path(entry.path) + if entry.is_dir(): + yield from walk_with_gitignore(path, specs) + else: + if path.parts[-1] not in IGNORE_FILES: + yield path + + +def get_files(root: Path): + for path in walk_with_gitignore(root): + filename = path.parts[-1] + ext = os.path.splitext(filename)[1].lstrip(".") + if ext.lower() in EXT_LOOKUP: + if filename in IGNORE_FILES: + logger.info("\nSkipped File: %s", path) + yield path + + +def check_files(root: Path): """ Walk a folder system, scanning all files with specified extensions. Errors are logged and counted and the count of errors is returned. @@ -219,42 +244,33 @@ def check_files(root, quiet): """ file_count = 0 error_count = 0 - for path, dirs, files in os.walk(root, topdown=True): - dirs[:] = [d for d in dirs if d not in IGNORE_FOLDERS] - for filename in files: - ext = os.path.splitext(filename)[1].lstrip(".") - if ext.lower() in EXT_LOOKUP: - file_path = os.path.join(path, filename) - if filename in IGNORE_FILES: - if not quiet: - print("\nFile: " + file_path + " is skipped") - continue - file_count += 1 - if not quiet: - print("\nChecking File: " + file_path) - with open(file_path, encoding="utf-8") as f: - file_contents = f.read() - - error_count += verify_no_deny_list_words(file_contents, file_path) - error_count += verify_no_secret_keys(file_contents, file_path) - error_count += verify_no_secret_keys(filename, file_path) - error_count += verify_snippet_start_end(file_contents, file_path) + for file_path in get_files(root): + file_count += 1 + logger.info("\nChecking File: %s", file_path) + + with open(file_path, encoding="utf-8") as f: + file_contents = f.read() + + error_count += verify_no_deny_list_words(file_contents, file_path) + error_count += verify_no_secret_keys(file_contents, file_path) + error_count += verify_no_secret_keys(file_contents, file_path) + error_count += verify_snippet_start_end(file_contents, file_path) print(f"{file_count} files scanned in {root}.\n") return error_count -def verify_no_deny_list_words(file_contents, file_location): +def verify_no_deny_list_words(file_contents: str, file_location: Path): """Verify no words in the file are in the list of denied words.""" error_count = 0 for word in file_contents.split(): if word.lower() in DENY_LIST: - logger.error(f"Word '%s' in %s is not allowed.", word, file_location) + logger.error("Word '%s' in %s is not allowed.", word, file_location) error_count += 1 return error_count -def verify_sample_files(root_path): +def verify_sample_files(root_path: Path): """Verify sample files meet the requirements and have not moved.""" sample_files_folder = os.path.join(root_path, "resources/sample_files") media_folder = ".sample_media" @@ -269,7 +285,7 @@ def verify_sample_files(root_path): ext = os.path.splitext(file_name)[1].lstrip(".") if file_name not in EXPECTED_SAMPLE_FILES: logger.error( - f"File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.", + "File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.", file_name, sample_files_folder, ) @@ -277,7 +293,7 @@ def verify_sample_files(root_path): if ext.lower() in MEDIA_FILE_TYPES: if media_folder not in file_path: logger.error( - f"File '%s' in %s must be in the %s directory.", + "File '%s' in %s must be in the %s directory.", file_name, sample_files_folder, media_folder, @@ -285,7 +301,7 @@ def verify_sample_files(root_path): error_count += 1 if (os.path.getsize(file_path) / ONE_MB_AS_BYTES) > MAX_FILE_SIZE_MB: logger.error( - f"File '%s' in %s is larger than the allowed size for a sample file.", + "File '%s' in %s is larger than the allowed size for a sample file.", file_name, sample_files_folder, ) @@ -294,15 +310,16 @@ def verify_sample_files(root_path): for sample_file in EXPECTED_SAMPLE_FILES: if sample_file not in file_list: logger.error( - f"Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.", + "Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.", sample_file, sample_files_folder, ) error_count += 1 + return error_count -def verify_no_secret_keys(file_contents, file_location): +def verify_no_secret_keys(file_contents: str, file_location: Path): """Verify the file does not contain 20- or 40- length character strings, which might be secret keys. Allow strings in the allowlist in https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/.github/pre_validate/pre_validate.py. @@ -339,7 +356,7 @@ def verify_no_secret_keys(file_contents, file_location): return error_count -def verify_snippet_start_end(file_contents, file_location): +def verify_snippet_start_end(file_contents: str, file_location: Path): """Scan the file contents for snippet-start and snippet-end tags and verify that they are in matched pairs. Log errors and return the count of errors.""" error_count = 0 @@ -350,7 +367,7 @@ def verify_snippet_start_end(file_contents, file_location): if snippet_start in word: tag = word.split("[")[1] if tag in snippet_tags: - logger.error(f"Duplicate tag {tag[:-1]} found in {file_location}.") + logger.error("Duplicate tag %s found in %s.", tag[:-1], file_location) error_count += 1 else: snippet_tags.add(tag) @@ -360,8 +377,9 @@ def verify_snippet_start_end(file_contents, file_location): snippet_tags.remove(tag) else: logger.error( - f"End tag {tag[:-1]} with no matching start tag " - f"found in {file_location}." + "End tag %s with no matching start tag " "found in %s.", + tag[:-1], + file_location, ) error_count += 1 @@ -391,7 +409,9 @@ def main(): ) args = parser.parse_args() - root_path = os.path.abspath(".") if not args.root else os.path.abspath(args.root) + root_path = Path( + os.path.abspath(".") if not args.root else os.path.abspath(args.root) + ) print("----------\n\nRun Tests\n") error_count = check_files(root_path, args.quiet) diff --git a/.tools/pre_validate/test/test_pre_validate.py b/.tools/validation/test/test_pre_validate.py similarity index 100% rename from .tools/pre_validate/test/test_pre_validate.py rename to .tools/validation/test/test_pre_validate.py diff --git a/.tools/validation/validate.py b/.tools/validation/validate.py new file mode 100755 index 00000000000..3ba8c0c3882 --- /dev/null +++ b/.tools/validation/validate.py @@ -0,0 +1,44 @@ +import argparse +from pathlib import Path +from sys import exit +from metadata_validator import validate_all +from project_validator import check_files, verify_sample_files + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--quiet", + action="store_true", + help="Suppresses output of filenames while parsing. " "The default is False.", + ) + parser.add_argument( + "--root", + default=f"{Path(__file__).parent / '..' / '..'}", + help="The root path from which to search for files " + "to check. The default is the current working " + "folder.", + ) + parser.add_argument( + "--doc-gen", + default=f"{Path(__file__).parent / '..' / '..' / '.doc_gen'}", + help="The folder that contains schema and metadata files.", + required=False, + ) + args = parser.parse_args() + root_path = Path(args.root).resolve() + + error_count = check_files(root_path) + error_count += verify_sample_files(root_path) + error_count += validate_all(Path(args.doc_gen)) + + if error_count > 0: + print(f"{error_count} errors found, please fix them.") + else: + print("All checks passed, you are cleared to check in.") + + return error_count + + +if __name__ == "__main__": + exit(main()) diff --git a/.tools/pre_validate/words.py b/.tools/validation/words.py similarity index 100% rename from .tools/pre_validate/words.py rename to .tools/validation/words.py