Skip to content

Commit

Permalink
Merge pull request #2692 from mdklab/fix-issue-1802-unicode-error
Browse files Browse the repository at this point in the history
Fix issue #1802: Handle UnicodeDecodeError in repo file paths
  • Loading branch information
paul-gauthier authored Dec 26, 2024
2 parents be6811b + 5c92491 commit 9befec5
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
6 changes: 6 additions & 0 deletions aider/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ def sanity_check_repo(repo, io):
if not repo.git_repo_error:
return True
error_msg = str(repo.git_repo_error)
except UnicodeDecodeError as exc:
error_msg = (
f"Failed to read the Git repository. This issue is likely caused by a path encoded "
f"in a format different from the expected encoding \"{sys.getfilesystemencoding()}\".\n"
f"Internal error: {str(exc)}"
)
except ANY_GIT_ERROR as exc:
error_msg = str(exc)
bad_ver = "version in (1, 2)" in error_msg
Expand Down
40 changes: 40 additions & 0 deletions tests/basic/test_sanity_check_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from aider import urls
from aider.main import sanity_check_repo
from aider.repo import GitRepo
from aider.io import InputOutput


@pytest.fixture
Expand Down Expand Up @@ -182,3 +184,41 @@ def test_sanity_check_repo_with_no_repo(mock_io):
# Assert that no errors or outputs were logged
mock_io.tool_error.assert_not_called()
mock_io.tool_output.assert_not_called()


def corrupt_git_index(repo_path):
index_path = os.path.join(repo_path, ".git", "index")
with open(index_path, "r+b") as f:
# Verify the file has the correct signature
signature = f.read(4)
if signature != b"DIRC":
raise ValueError("Invalid git index file signature.")

# Seek to the data section and inject invalid bytes to simulate encoding error
f.seek(77)
f.write(b"\xF5" * 5)


def test_sanity_check_repo_with_corrupt_index(create_repo, mock_io):
repo_path, repo = create_repo
# Corrupt the Git index file
corrupt_git_index(repo_path)

# Create GitRepo instance
git_repo = GitRepo(InputOutput(), None, repo_path)

# Call the function
result = sanity_check_repo(git_repo, mock_io)

# Assert that the function returns False
assert result is False

# Assert that the appropriate error messages were logged
mock_io.tool_error.assert_called_with("Unable to read git repository, it may be corrupt?")
mock_io.tool_output.assert_called_with(
(
"Failed to read the Git repository. This issue is likely caused by a path encoded "
"in a format different from the expected encoding \"utf-8\".\n"
"Internal error: 'utf-8' codec can't decode byte 0xf5 in position 3: invalid start byte"
)
)

0 comments on commit 9befec5

Please sign in to comment.