Skip to content

Commit

Permalink
Consolidated validators to .tools/validation, and reworked file walker
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidSouther committed Oct 31, 2023
1 parent 1dfecb3 commit 264c286
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 85 deletions.
27 changes: 0 additions & 27 deletions .github/workflows/validate-doc-metadata.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: 3.8 #install the python needed
- name: Run Python script to vet code examples # Runs a single command using the runners shell
run: |
python3 .tools/pre_validate/pre_validate.py --q
- name: Install dependencies
run: >-
python3 -m pip install -r .tools/base_requirements.txt
- name: Lint with yamllint
run: >-
yamllint --format standard -c .tools/validation/.yamllint.yaml .doc_gen/metadata .doc_gen/metadata/curated
- name: Validate metadata with yamale
run: >-
python3 .tools/validate/validate.py --q --doc-gen .doc_gen
1 change: 1 addition & 0 deletions .tools/base_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
black==23.9.1
flake8==6.1.0
mypy-extensions==1.0.0
pathspec==0.11.2
PyYAML==6.0.1
requests==2.31.0
types-PyYAML==6.0.12.12
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _is_valid(self, value):

def validate_files(schema_name: Path, meta_names: Iterable[Path], validators):
"""Iterate a list of files and validate each one against a schema."""
success = True
success = 0

schema = yamale.make_schema(schema_name, validators=validators)
for meta_name in meta_names:
Expand All @@ -163,7 +163,7 @@ def validate_files(schema_name: Path, meta_names: Iterable[Path], validators):
print(f"{meta_name.resolve()} validation success! 👍")
except YamaleError as e:
print(e.message)
success = False
success += 1
return success


Expand Down Expand Up @@ -203,9 +203,9 @@ def validate_all(doc_gen: Path):
("curated_sources_schema.yaml", "curated/sources.yaml"),
("curated_example_schema.yaml", "curated/*_metadata.yaml"),
]
success = True
success = 0
for schema, metadata in to_validate:
success &= validate_files(
success += validate_files(
schema_root / schema, (doc_gen / "metadata").glob(metadata), validators
)

Expand All @@ -224,7 +224,7 @@ def main():

success = validate_all(Path(args.doc_gen))

if success:
if success == 0:
print("Validation succeeded! 👍👍👍")
else:
print("\n********************************************")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python3
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Expand All @@ -22,6 +23,9 @@
import argparse
import logging
import sys
from pathlib import Path
from pathspec import GitIgnoreSpec
from typing import Generator
from words import WORDS

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -57,22 +61,6 @@
# action in a clean environment (aside from testing),
# exhaustive ignore lists shouldn't be necessary.

# Folders to skip.
IGNORE_FOLDERS = {
".doc_gen",
".pytest_cache",
".tools",
".venv",
"__pycache__",
"bin",
"cdk.out",
"dist",
"node_modules",
"obj",
"target",
"venv",
}

# Files to skip.
IGNORE_FILES = {
".moviedata.json",
Expand Down Expand Up @@ -208,7 +196,44 @@
}


def check_files(root, quiet):
def match_path_to_specs(path: Path, specs: list[GitIgnoreSpec]) -> bool:
"""
Return True if we should skip this path, that is, it is matched by a .gitignore.
"""
for spec in specs:
if spec.match_file(path):
return True
return False


def walk_with_gitignore(
root: Path, specs: list[GitIgnoreSpec] = []
) -> Generator[Path, None, None]:
gitignore = root / ".gitignore"
if gitignore.exists():
with open(root / ".gitignore", "r", encoding="utf-8") as gitignore:
specs = [*specs, GitIgnoreSpec.from_lines(gitignore.readlines())]
for entry in os.scandir(root):
if not match_path_to_specs(entry.path, specs):
path = Path(entry.path)
if entry.is_dir():
yield from walk_with_gitignore(path, specs)
else:
if path.parts[-1] not in IGNORE_FILES:
yield path


def get_files(root: Path):
for path in walk_with_gitignore(root):
filename = path.parts[-1]
ext = os.path.splitext(filename)[1].lstrip(".")
if ext.lower() in EXT_LOOKUP:
if filename in IGNORE_FILES:
logger.info("\nSkipped File: %s", path)
yield path


def check_files(root: Path):
"""
Walk a folder system, scanning all files with specified extensions.
Errors are logged and counted and the count of errors is returned.
Expand All @@ -219,42 +244,33 @@ def check_files(root, quiet):
"""
file_count = 0
error_count = 0
for path, dirs, files in os.walk(root, topdown=True):
dirs[:] = [d for d in dirs if d not in IGNORE_FOLDERS]
for filename in files:
ext = os.path.splitext(filename)[1].lstrip(".")
if ext.lower() in EXT_LOOKUP:
file_path = os.path.join(path, filename)
if filename in IGNORE_FILES:
if not quiet:
print("\nFile: " + file_path + " is skipped")
continue
file_count += 1
if not quiet:
print("\nChecking File: " + file_path)
with open(file_path, encoding="utf-8") as f:
file_contents = f.read()

error_count += verify_no_deny_list_words(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_no_secret_keys(filename, file_path)
error_count += verify_snippet_start_end(file_contents, file_path)
for file_path in get_files(root):
file_count += 1
logger.info("\nChecking File: %s", file_path)

with open(file_path, encoding="utf-8") as f:
file_contents = f.read()

error_count += verify_no_deny_list_words(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_no_secret_keys(file_contents, file_path)
error_count += verify_snippet_start_end(file_contents, file_path)

print(f"{file_count} files scanned in {root}.\n")
return error_count


def verify_no_deny_list_words(file_contents, file_location):
def verify_no_deny_list_words(file_contents: str, file_location: Path):
"""Verify no words in the file are in the list of denied words."""
error_count = 0
for word in file_contents.split():
if word.lower() in DENY_LIST:
logger.error(f"Word '%s' in %s is not allowed.", word, file_location)
logger.error("Word '%s' in %s is not allowed.", word, file_location)
error_count += 1
return error_count


def verify_sample_files(root_path):
def verify_sample_files(root_path: Path):
"""Verify sample files meet the requirements and have not moved."""
sample_files_folder = os.path.join(root_path, "resources/sample_files")
media_folder = ".sample_media"
Expand All @@ -269,23 +285,23 @@ def verify_sample_files(root_path):
ext = os.path.splitext(file_name)[1].lstrip(".")
if file_name not in EXPECTED_SAMPLE_FILES:
logger.error(
f"File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
"File '%s' in %s was not found in the list of expected sample files. If this is a new sample file, add it to the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
file_name,
sample_files_folder,
)
error_count += 1
if ext.lower() in MEDIA_FILE_TYPES:
if media_folder not in file_path:
logger.error(
f"File '%s' in %s must be in the %s directory.",
"File '%s' in %s must be in the %s directory.",
file_name,
sample_files_folder,
media_folder,
)
error_count += 1
if (os.path.getsize(file_path) / ONE_MB_AS_BYTES) > MAX_FILE_SIZE_MB:
logger.error(
f"File '%s' in %s is larger than the allowed size for a sample file.",
"File '%s' in %s is larger than the allowed size for a sample file.",
file_name,
sample_files_folder,
)
Expand All @@ -294,15 +310,16 @@ def verify_sample_files(root_path):
for sample_file in EXPECTED_SAMPLE_FILES:
if sample_file not in file_list:
logger.error(
f"Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
"Expected sample file '%s' was not found in '%s'. If this file was intentionally removed, remove it from the EXPECTED_SAMPLE_FILES list in pre_validate.py.",
sample_file,
sample_files_folder,
)
error_count += 1

return error_count


def verify_no_secret_keys(file_contents, file_location):
def verify_no_secret_keys(file_contents: str, file_location: Path):
"""Verify the file does not contain 20- or 40- length character strings,
which might be secret keys. Allow strings in the allowlist in
https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/.github/pre_validate/pre_validate.py.
Expand Down Expand Up @@ -339,7 +356,7 @@ def verify_no_secret_keys(file_contents, file_location):
return error_count


def verify_snippet_start_end(file_contents, file_location):
def verify_snippet_start_end(file_contents: str, file_location: Path):
"""Scan the file contents for snippet-start and snippet-end tags and verify
that they are in matched pairs. Log errors and return the count of errors."""
error_count = 0
Expand All @@ -350,7 +367,7 @@ def verify_snippet_start_end(file_contents, file_location):
if snippet_start in word:
tag = word.split("[")[1]
if tag in snippet_tags:
logger.error(f"Duplicate tag {tag[:-1]} found in {file_location}.")
logger.error("Duplicate tag %s found in %s.", tag[:-1], file_location)
error_count += 1
else:
snippet_tags.add(tag)
Expand All @@ -360,8 +377,9 @@ def verify_snippet_start_end(file_contents, file_location):
snippet_tags.remove(tag)
else:
logger.error(
f"End tag {tag[:-1]} with no matching start tag "
f"found in {file_location}."
"End tag %s with no matching start tag " "found in %s.",
tag[:-1],
file_location,
)
error_count += 1

Expand Down Expand Up @@ -391,7 +409,9 @@ def main():
)
args = parser.parse_args()

root_path = os.path.abspath(".") if not args.root else os.path.abspath(args.root)
root_path = Path(
os.path.abspath(".") if not args.root else os.path.abspath(args.root)
)

print("----------\n\nRun Tests\n")
error_count = check_files(root_path, args.quiet)
Expand Down
File renamed without changes.
44 changes: 44 additions & 0 deletions .tools/validation/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import argparse
from pathlib import Path
from sys import exit
from metadata_validator import validate_all
from project_validator import check_files, verify_sample_files


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--quiet",
action="store_true",
help="Suppresses output of filenames while parsing. " "The default is False.",
)
parser.add_argument(
"--root",
default=f"{Path(__file__).parent / '..' / '..'}",
help="The root path from which to search for files "
"to check. The default is the current working "
"folder.",
)
parser.add_argument(
"--doc-gen",
default=f"{Path(__file__).parent / '..' / '..' / '.doc_gen'}",
help="The folder that contains schema and metadata files.",
required=False,
)
args = parser.parse_args()
root_path = Path(args.root).resolve()

error_count = check_files(root_path)
error_count += verify_sample_files(root_path)
error_count += validate_all(Path(args.doc_gen))

if error_count > 0:
print(f"{error_count} errors found, please fix them.")
else:
print("All checks passed, you are cleared to check in.")

return error_count


if __name__ == "__main__":
exit(main())
File renamed without changes.

0 comments on commit 264c286

Please sign in to comment.