Skip to content

Commit

Permalink
better file filtering (#3983)
Browse files Browse the repository at this point in the history
better file fitlereing

Co-authored-by: Martin Ye <[email protected]>
  • Loading branch information
MartinYe1234 and Martin Ye authored Jun 6, 2024
1 parent df1905d commit be8e28d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
10 changes: 10 additions & 0 deletions sweepai/config/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import os
import re
import traceback
from functools import lru_cache

Expand Down Expand Up @@ -313,6 +314,15 @@ def is_file_suitable(self, file_contents: str) -> tuple[bool, str]:
if len(file_contents)/line_count > 200:
return False, "This file was determined to be non human readable due to the average line length."
return True, ""

# returns if a file is likely autogenerated or not
def is_file_auto_generated(self, file_name: str) -> tuple[bool, str]:
# if there is a string of numbers in the file name that is more than 4 characters long, it is likely autogenerated
pattern = r'\d{4,}'
match = re.search(pattern, file_name)
if bool(match):
return True, "The filename means that this file is likely auto generated."
return False, ""



Expand Down
7 changes: 7 additions & 0 deletions sweepai/core/review_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ def get_pr_changes(repo: Repository, pr: PullRequest) -> tuple[dict[str, PRChang
errored = True
e = UnsuitableFileException(reason)
unsuitable_files.append((file_name, e))
else:
# drop likely autogenerated files based on file name
auto_generated, reason = sweep_config.is_file_auto_generated(file_name)
if auto_generated:
errored = True
e = UnsuitableFileException(reason)
unsuitable_files.append((file_name, e))

if errored:
posthog.capture(
Expand Down

0 comments on commit be8e28d

Please sign in to comment.