Skip to content

Commit

Permalink
Tools: Consolidated validators to .tools/validation (#5597)
Browse files Browse the repository at this point in the history
* Consolidated validators to .tools/validation, and reworked file walker
* Better snippet validation workflow name
* bump to python 3.11
* Commented validators
* Cleaned up IGNORE_FILES checks between walk... and git...
  • Loading branch information
DavidSouther authored and ford-at-aws committed Dec 15, 2023
1 parent 02453be commit a7331a7
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 95 deletions.
20 changes: 2 additions & 18 deletions .github/workflows/validate-doc-metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,5 @@ jobs:
validate-doc-metadata:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
persist-credentials: false
fetch-depth: 0
- name: Set up Python 3.9.x
uses: actions/setup-python@v3
with:
python-version: "3.9.x"
- name: Install dependencies
run: |
python -m pip install yamale
python -m pip install yamllint
- name: Lint with yamllint
run: yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata
- name: Lint with yamllint
run: yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata/curated
- name: Validate metadata with yamale
run: python .tools/validation/validate_doc_metadata.py --doc-gen .doc_gen
- name: Pass
run: true
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Run Python checkin script on push or pull request
name: Miscellaneous pre-validation checks
name: Snippet & Repo Validation Checks

# Controls when the action will run.
on:
Expand All @@ -26,7 +26,13 @@ jobs:
- name: setup python
uses: actions/setup-python@v3
with:
python-version: 3.8 #install the python needed
- name: Run Python script to vet code examples # Runs a single command using the runners shell
run: |
python3 .tools/pre_validate/pre_validate.py --q
python-version: 3.11 #install the python needed
- name: Install dependencies
run: >-
python3 -m pip install -r .tools/base_requirements.txt
- name: Lint metadata files
run: >-
yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata .doc_gen/metadata/curated
- name: Validate metadata and repo
run: >-
python3 .tools/validation/validate.py --q --doc-gen .doc_gen
25 changes: 13 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
.venv
build_dir/
target/
*.exe
*.swp
.*~
.DS_Store
.metadata
.recommenders
._*
vendor/
*.exe
.idea
.vscode
venv
.*~
xcuserdata
.metadata
.phpunit.result.cache
.recommenders
.swiftpm
.venv
.vscode
Package.resolved
build_dir
node_modules
super-linter.log
.phpunit.result.cache
target
vendor
venv
xcuserdata
1 change: 1 addition & 0 deletions .tools/base_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
black==23.9.1
flake8==6.1.0
mypy-extensions==1.0.0
pathspec==0.11.2
PyYAML==6.0.1
requests==2.31.0
types-PyYAML==6.0.12.12
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _is_valid(self, value):

def validate_files(schema_name: Path, meta_names: Iterable[Path], validators):
"""Iterate a list of files and validate each one against a schema."""
success = True
errors = 0

schema = yamale.make_schema(schema_name, validators=validators)
for meta_name in meta_names:
Expand All @@ -163,11 +163,11 @@ def validate_files(schema_name: Path, meta_names: Iterable[Path], validators):
print(f"{meta_name.resolve()} validation success! 👍")
except YamaleError as e:
print(e.message)
success = False
return success
errors += 1
return errors


def validate_all(doc_gen: Path):
def validate_metadata(doc_gen: Path):
# with open(doc_gen / "metadata" / "sdks.yaml") as sdks_file:
# sdks_yaml: dict[str, any] = yaml.safe_load(sdks_file)

Expand Down Expand Up @@ -203,13 +203,13 @@ def validate_all(doc_gen: Path):
("curated_sources_schema.yaml", "curated/sources.yaml"),
("curated_example_schema.yaml", "curated/*_metadata.yaml"),
]
success = True
errors = 0
for schema, metadata in to_validate:
success &= validate_files(
errors += validate_files(
schema_root / schema, (doc_gen / "metadata").glob(metadata), validators
)

return success
return errors


def main():
Expand All @@ -222,9 +222,9 @@ def main():
)
args = parser.parse_args()

success = validate_all(Path(args.doc_gen))
errors = validate_metadata(Path(args.doc_gen))

if success:
if errors == 0:
print("Validation succeeded! 👍👍👍")
else:
print("\n********************************************")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python3
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Expand All @@ -22,6 +23,9 @@
import argparse
import logging
import sys
from pathlib import Path
from pathspec import GitIgnoreSpec
from typing import Generator
from words import WORDS

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -57,22 +61,6 @@
# action in a clean environment (aside from testing),
# exhaustive ignore lists shouldn't be necessary.

# Folders to skip.
IGNORE_FOLDERS = {
".doc_gen",
".pytest_cache",
".tools",
".venv",
"__pycache__",
"bin",
"cdk.out",
"dist",
"node_modules",
"obj",
"target",
"venv",
}

# Files to skip.
IGNORE_FILES = {
".moviedata.json",
Expand Down Expand Up @@ -208,53 +196,88 @@
}


def check_files(root, quiet):
def match_path_to_specs(path: Path, specs: list[GitIgnoreSpec]) -> bool:
"""
Return True if we should skip this path, that is, it is matched by a .gitignore.
"""
for spec in specs:
if spec.match_file(path):
return True
return False


def walk_with_gitignore(
root: Path, specs: list[GitIgnoreSpec] = []
) -> Generator[Path, None, None]:
"""
Starting from a root directory, walk the file system yielding a path for each file.
However, it also reads `.gitignore` files, so that it behaves like `git ls-files`.
It does not actively use `git ls-files` because it wouldn't catch new files without
fiddling with a number of flags.
"""
gitignore = root / ".gitignore"
if gitignore.exists():
with open(root / ".gitignore", "r", encoding="utf-8") as gitignore:
specs = [*specs, GitIgnoreSpec.from_lines(gitignore.readlines())]
for entry in os.scandir(root):
if not match_path_to_specs(entry.path, specs):
path = Path(entry.path)
if entry.is_dir():
yield from walk_with_gitignore(path, specs)
else:
yield path


def get_files(root: Path) -> Generator[Path, None, None]:
"""
Yield non-skipped files, that is, anything not matching git ls-files and not
in the "to skip" files that are in git but are machine generated, so we don't
want to validate them.
"""
for path in walk_with_gitignore(root):
filename = path.parts[-1]
ext = os.path.splitext(filename)[1].lstrip(".")
if ext.lower() in EXT_LOOKUP and filename not in IGNORE_FILES:
yield path


def check_files(root: Path):
"""
Walk a folder system, scanning all files with specified extensions.
Errors are logged and counted and the count of errors is returned.
:param root: The root folder to start the walk.
:param quiet: When True, suppress most output.
:return: The number of errors found in the scanned files.
"""
file_count = 0
error_count = 0
for path, dirs, files in os.walk(root, topdown=True):
dirs[:] = [d for d in dirs if d not in IGNORE_FOLDERS]
for filename in files:
ext = os.path.splitext(filename)[1].lstrip(".")
if ext.lower() in EXT_LOOKUP:
file_path = os.path.join(path, filename)
if filename in IGNORE_FILES:
if not quiet:
print("\nFile: " + file_path + " is skipped")
continue
file_count += 1
if not quiet:
print("\nChecking File: " + file_path)
with open(file_path, encoding="utf-8") as f:
file_contents = f.read()

error_count += verify_no_deny_list_words(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_no_secret_keys(filename, file_path)
error_count += verify_snippet_start_end(file_contents, file_path)
for file_path in get_files(root):
file_count += 1
logger.info("\nChecking File: %s", file_path)

with open(file_path, encoding="utf-8") as f:
file_contents = f.read()

error_count += verify_no_deny_list_words(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_snippet_start_end(file_contents, file_path)

print(f"{file_count} files scanned in {root}.\n")
return error_count


def verify_no_deny_list_words(file_contents, file_location):
def verify_no_deny_list_words(file_contents: str, file_location: Path):
"""Verify no words in the file are in the list of denied words."""
error_count = 0
for word in file_contents.split():
if word.lower() in DENY_LIST:
logger.error(f"Word '%s' in %s is not allowed.", word, file_location)
logger.error("Word '%s' in %s is not allowed.", word, file_location)
error_count += 1
return error_count


def verify_sample_files(root_path):
def verify_sample_files(root_path: Path) -> int:
"""Verify sample files meet the requirements and have not moved."""
sample_files_folder = os.path.join(root_path, "resources/sample_files")
media_folder = ".sample_media"
Expand All @@ -269,23 +292,23 @@ def verify_sample_files(root_path):
ext = os.path.splitext(file_name)[1].lstrip(".")
if file_name not in EXPECTED_SAMPLE_FILES:
logger.error(
f"File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
"File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
file_name,
sample_files_folder,
)
error_count += 1
if ext.lower() in MEDIA_FILE_TYPES:
if media_folder not in file_path:
logger.error(
f"File '%s' in %s must be in the %s directory.",
"File '%s' in %s must be in the %s directory.",
file_name,
sample_files_folder,
media_folder,
)
error_count += 1
if (os.path.getsize(file_path) / ONE_MB_AS_BYTES) > MAX_FILE_SIZE_MB:
logger.error(
f"File '%s' in %s is larger than the allowed size for a sample file.",
"File '%s' in %s is larger than the allowed size for a sample file.",
file_name,
sample_files_folder,
)
Expand All @@ -294,15 +317,16 @@ def verify_sample_files(root_path):
for sample_file in EXPECTED_SAMPLE_FILES:
if sample_file not in file_list:
logger.error(
f"Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
"Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
sample_file,
sample_files_folder,
)
error_count += 1

return error_count


def verify_no_secret_keys(file_contents, file_location):
def verify_no_secret_keys(file_contents: str, file_location: Path) -> int:
"""Verify the file does not contain 20- or 40- length character strings,
which might be secret keys. Allow strings in the allowlist in
https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/.github/pre_validate/pre_validate.py.
Expand Down Expand Up @@ -339,7 +363,7 @@ def verify_no_secret_keys(file_contents, file_location):
return error_count


def verify_snippet_start_end(file_contents, file_location):
def verify_snippet_start_end(file_contents: str, file_location: Path) -> int:
"""Scan the file contents for snippet-start and snippet-end tags and verify
that they are in matched pairs. Log errors and return the count of errors."""
error_count = 0
Expand All @@ -350,7 +374,7 @@ def verify_snippet_start_end(file_contents, file_location):
if snippet_start in word:
tag = word.split("[")[1]
if tag in snippet_tags:
logger.error(f"Duplicate tag {tag[:-1]} found in {file_location}.")
logger.error("Duplicate tag %s found in %s.", tag[:-1], file_location)
error_count += 1
else:
snippet_tags.add(tag)
Expand All @@ -360,8 +384,9 @@ def verify_snippet_start_end(file_contents, file_location):
snippet_tags.remove(tag)
else:
logger.error(
f"End tag {tag[:-1]} with no matching start tag "
f"found in {file_location}."
"End tag %s with no matching start tag " "found in %s.",
tag[:-1],
file_location,
)
error_count += 1

Expand Down Expand Up @@ -391,7 +416,9 @@ def main():
)
args = parser.parse_args()

root_path = os.path.abspath(".") if not args.root else os.path.abspath(args.root)
root_path = Path(
os.path.abspath(".") if not args.root else os.path.abspath(args.root)
)

print("----------\n\nRun Tests\n")
error_count = check_files(root_path, args.quiet)
Expand Down
File renamed without changes.
Loading

0 comments on commit a7331a7

Please sign in to comment.