Skip to content

Commit

Permalink
Diff/Dependency script fixes (#41)
Browse files Browse the repository at this point in the history
* Diff script: Restricted the checks to proto files in the proto/ directory. Added a check to ensure we don't introduce new files in a higher version than v1.
* Changed the create_c4t_file_listing.sh to directly output the c4t files to be used directly by the dependency resolution script.
* Updated the dependency_checker to automatically get the c4t files. Also updated an automatic update of the package name and all references in the new files.
* Only print the dependency graph if there are no errors in the prior dependency check.

---------

Co-authored-by: Noctunus <[email protected]>
  • Loading branch information
2 people authored and mo-c4t committed Sep 23, 2024
1 parent d0a4c18 commit 254ef55
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 29 deletions.
5 changes: 2 additions & 3 deletions scripts/create_c4t_file_listing.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/bin/bash

# Simple helper script to just create a file which contains a listing of all the cmp proto files in the c4t branch
# This can then be used by the dependency checker to determine whether a new version of a proto file can be reused
# This is used by the dependency checker to determine whether a new version of a proto file can be reused
# when it's not present in the c4t branch

OUTFILE=${1:-temp/c4tfiles.txt}
git ls-tree -r --name-only origin/c4t proto/cmp | grep -oP "cmp/.*\.proto" > $OUTFILE
git ls-tree -r --name-only origin/c4t proto/cmp | grep -oP "cmp/.*\.proto"
109 changes: 86 additions & 23 deletions scripts/dependency_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,64 @@ def ensure_directory_exists(directory):
if not os.path.exists(directory):
os.makedirs(directory)

def extract_proto_definitions(proto_file_path):
"""
Extracts message and enum names from a .proto file.
Args:
proto_file_path (str): Path to the .proto file.
Returns:
list: A list of message and enum names.
"""
definitions = []
# Regular expression to match 'message' or 'enum' followed by the name
pattern = re.compile(r'^\s*(message|enum)\s+(\w+)', re.MULTILINE)

with open(proto_file_path, 'r') as file:
content = file.read()
matches = pattern.findall(content)
# Extract the second group which is the name
definitions = [match[1] for match in matches]

return definitions

def search_replace_in_file(filename, search_replace):
print(f"πŸ“ Running search/replace on the file {filename}")

# Read in the file
with open(filename, 'r') as file:
filedata = file.read()

# Replace the target string
for search, replace in search_replace:
filedata = filedata.replace(search, replace)
num = 0
while search in filedata:
filedata = filedata.replace(search, replace, 1)
num += 1
print(f" πŸ” Replaced '{search}' with '{replace}' : #{num}")

# Write the file out again
with open(filename, 'w') as file:
file.write(filedata)

def getC4TFiles():
print("πŸ” Getting the proto files in the c4t branch for reference")

# Get the proto files in the c4t branch by calling an external script
c4t_files = []
try:
c4t_files = os.popen("scripts/create_c4t_file_listing.sh").read().splitlines()
except:
print("β›” [FATAL] Unable to get c4t files. Exiting")
sys.exit(1)

if len(c4t_files) == 0:
print("β›” [FATAL] No files found in c4t branch. Exiting")
sys.exit(1)

return c4t_files

def extract_command_line_args():
# Create an ArgumentParser object
parser = argparse.ArgumentParser(description="Extract command line arguments")
Expand All @@ -34,23 +79,14 @@ def extract_command_line_args():
parser.add_argument('--print-graph', action='store_true', help="Enable graph printing ")
parser.add_argument('--fix', action='store_true', help="Enable fix mode which will try to fix the dependencies")

# Add the --c4t-files argument, which takes a file input
parser.add_argument('--c4t-files', type=argparse.FileType('r'), help="File containing filenames of existing files in the c4t branch to decide the latest version for the --fix mode. The file can be generated by using the create_c4t_file_listing.sh script.")

# Parse the arguments
args = parser.parse_args()

# Extract the values of the flags and file content
print_graph = args.print_graph
fix = args.fix
c4t_files = []

# If the --c4t-files argument is provided, read and store the filenames into an array
if args.c4t_files:
with args.c4t_files as file:
c4t_files = [line.strip() for line in file] # Strip out newline characters

return print_graph, fix, c4t_files
return print_graph, fix

def find_latest_version(old_file, recent_files):
match = file_pattern.match(old_file)
Expand All @@ -62,7 +98,7 @@ def find_latest_version(old_file, recent_files):
if match:
new_prefix, new_version, new_proto_filename = match.groups()
if prefix == new_prefix and proto_filename == new_proto_filename:
return recent_file
return prefix, version, new_version, recent_file
return False

# Function to return all the latest protobuf files
Expand Down Expand Up @@ -119,7 +155,7 @@ def extract_proto_includes(proto_file_path):

return includes

print_graph, fix, c4tfiles = extract_command_line_args()
print_graph, fix = extract_command_line_args()
fixed_new_version_files = []
directory_path = "proto/"

Expand Down Expand Up @@ -194,43 +230,64 @@ def print_dependency_graph(dep_dict):
print() # Add an empty line for better readability

if print_graph:
print_dependency_graph(include_graph)
if global_error == True:
print("❌ [FAIL] Won't print the graph as there were errors found while doing the dependency check!")
else:
print_dependency_graph(include_graph)

## Fix the dependencies if --fix is passed:
if fix:
# Get the proto files in the c4t branch by calling an external script
print()
print("πŸ”§ Trying to fix the dependencies...")

c4tfiles = getC4TFiles()

max_iterations=20

for iteration in range(max_iterations): # if it's not fixable in 20 iterations we're cooked anyways so break then
print
print(f"πŸ”„ ITERATION #{iteration+1}/{max_iterations}")

for file, wrong_includes in fix_needed.items():
include_fixes = []
search_replace_fixes = []

print()
print(f"πŸ”¨ The file '{file}' needs a fix because the following includes are wrong:")
for wrong_include in wrong_includes:
correct_include = find_latest_version(wrong_include, latest_proto_files)
include_prefix, old_include_version, new_include_version, correct_include = find_latest_version(wrong_include, latest_proto_files)
if correct_include == False:
print(f"β›” [FATAL] Unable to find the latest version of {wrong_include}. Exiting")
sys.exit(2)

print(f" ➑ {wrong_include} ▢️ {correct_include}")
include_fixes.append( (wrong_include, correct_include) )

# Add the search and replace pair to the list for the include
search_replace_fixes.append( (wrong_include, correct_include) )

# Lastly we also need to update the usages of the protobuf message types in the new/existing file
# e.g. cmp.types.v1.Message -> cmp.types.v2.Message
# This is done by replacing the old version number with the new version number when messages from the include is used
# So we first need to extract the message names from the include file and then replace the old version number with the new version number
# in the search/replace step
prefix_dots = include_prefix.replace('/','.')
include_file = directory_path + correct_include
messages = extract_proto_definitions(include_file)
for message in messages:
message_search = f"{prefix_dots}.{old_include_version}.{message}"
message_replace = f"{prefix_dots}.{new_include_version}.{message}"
search_replace_fixes.append( (message_search, message_replace) )

# First we need to create a new file with version+1 where we can make the changes
# But first let's check some things:
# * If the file is not present in the c4t branch we can just apply the changes directly, as no new version is needed in that case
# * If the file was created in a previous iteration we can also apply the changes directly
if c4tfiles.count != 0 and file not in c4tfiles:
if len(c4tfiles) != 0 and file not in c4tfiles:
print(f"πŸ’‘ The file {file} is not present in the c4t branch, therefore apply the changes directly")
search_replace_in_file(directory_path + file, include_fixes)
search_replace_in_file(directory_path + file, search_replace_fixes)
elif file in fixed_new_version_files:
print(f"♻️ The file {file} was created in a previous iteration, therefore apply the changes directly")
search_replace_in_file(directory_path + file, include_fixes)
search_replace_in_file(directory_path + file, search_replace_fixes)
else:
# This is a new file popping up so we need to create a new version and apply the include changes there

Expand All @@ -245,9 +302,15 @@ def print_dependency_graph(dep_dict):
ensure_directory_exists(directory_path + new_path)
shutil.copyfile(directory_path + file, directory_path + new_filename)

print(f"πŸ“ Applying include fixes to the file")
# now that we have a new version (1:1 copy) of the wrong file let's fix the includes:
search_replace_in_file(directory_path + new_filename, include_fixes)
# now that we have a new version (1:1 copy) of the wrong file let's fix values in the file:
# Additionally we need to adjust the package name in the new file - this is only done
# if the file is newly created in this iteration as everything else would be wrong
prefix_dots = prefix.replace('/','.')
package_search = f"package {prefix_dots}.{version}"
package_replace = f"package {prefix_dots}.v{version_number}"
search_replace_fixes.append( (package_search, package_replace) )

search_replace_in_file(directory_path + new_filename, search_replace_fixes)
fixed_new_version_files.append(new_filename)


Expand Down
15 changes: 12 additions & 3 deletions scripts/diff_against_branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ function check_added_file {
echo -e "πŸ”’ Version: ${PURPLE}$FILE_VERSION${NC}"
echo -e "πŸ”ƒ Comparing against ${PURPLE}$ORIGIN/$OTHER_FILE${NC}"

# Check if the file actually exists in the origin branch
# Otherwise we can't do a diff and it's an error in the versioning
git show origin/$ORIGIN:$OTHER_FILE > /dev/null 2>&1
if [[ "$?" != "0" ]] ; then
echo -e "❌ ${RED}[FAIL] ERROR${NC}: File with version $(( FILE_VERSION - 1 )) not found in the origin branch: ${PURPLE}$ORIGIN/$OTHER_FILE${NC}"
ERROR_FILES+=("$FILE")
return
fi

GIT_PAGER=cat git diff --exit-code origin/$ORIGIN:$OTHER_FILE $FILE
if [[ "$?" == "0" ]] ; then
echo "❓ No change detected! (weird?)"
Expand Down Expand Up @@ -87,17 +96,17 @@ git fetch origin $ORIGIN > /dev/null 2>&1
echo -e "πŸ” Checking for illegal filesystem changes like removing/moving existing files"
while read FILE ; do
check_filesystem_changes $FILE
done < <(git diff --diff-filter=am --name-status origin/$ORIGIN | grep -oP "proto/.*")
done < <(git diff --diff-filter=am --name-status origin/$ORIGIN | grep -oP "proto/.*\.proto")

echo -e "πŸ” Checking for added files to print out the diff of the new version against the previous version"
while read FILE ; do
check_added_file $FILE
done < <(git diff --diff-filter=A --name-status origin/$ORIGIN | grep -oP "proto/.*")
done < <(git diff --diff-filter=A --name-status origin/$ORIGIN | grep -oP "proto/.*\.proto")

echo -e "πŸ” Checking for modifications in existing files to catch unwanted structural changes"
while read FILE ; do
check_modified_file $FILE
done < <(git diff --diff-filter=M --name-status origin/$ORIGIN | grep -oP "proto/.*")
done < <(git diff --diff-filter=M --name-status origin/$ORIGIN | grep -oP "proto/.*\.proto")

if [ ${#ERROR_FILES[@]} -gt 0 ] ; then
echo
Expand Down

0 comments on commit 254ef55

Please sign in to comment.