Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue #1802: Handle UnicodeDecodeError in repo file paths #2692

Merged
merged 1 commit into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions aider/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ def sanity_check_repo(repo, io):
if not repo.git_repo_error:
return True
error_msg = str(repo.git_repo_error)
except UnicodeDecodeError as exc:
error_msg = (
f"Failed to read the Git repository. This issue is likely caused by a path encoded "
f"in a format different from the expected encoding \"{sys.getfilesystemencoding()}\".\n"
f"Internal error: {str(exc)}"
)
except ANY_GIT_ERROR as exc:
error_msg = str(exc)
bad_ver = "version in (1, 2)" in error_msg
Expand Down
40 changes: 40 additions & 0 deletions tests/basic/test_sanity_check_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from aider import urls
from aider.main import sanity_check_repo
from aider.repo import GitRepo
from aider.io import InputOutput


@pytest.fixture
Expand Down Expand Up @@ -182,3 +184,41 @@ def test_sanity_check_repo_with_no_repo(mock_io):
# Assert that no errors or outputs were logged
mock_io.tool_error.assert_not_called()
mock_io.tool_output.assert_not_called()


def corrupt_git_index(repo_path):
index_path = os.path.join(repo_path, ".git", "index")
with open(index_path, "r+b") as f:
# Verify the file has the correct signature
signature = f.read(4)
if signature != b"DIRC":
raise ValueError("Invalid git index file signature.")

# Seek to the data section and inject invalid bytes to simulate encoding error
f.seek(77)
f.write(b"\xF5" * 5)


def test_sanity_check_repo_with_corrupt_index(create_repo, mock_io):
repo_path, repo = create_repo
# Corrupt the Git index file
corrupt_git_index(repo_path)

# Create GitRepo instance
git_repo = GitRepo(InputOutput(), None, repo_path)

# Call the function
result = sanity_check_repo(git_repo, mock_io)

# Assert that the function returns False
assert result is False

# Assert that the appropriate error messages were logged
mock_io.tool_error.assert_called_with("Unable to read git repository, it may be corrupt?")
mock_io.tool_output.assert_called_with(
(
"Failed to read the Git repository. This issue is likely caused by a path encoded "
"in a format different from the expected encoding \"utf-8\".\n"
"Internal error: 'utf-8' codec can't decode byte 0xf5 in position 3: invalid start byte"
)
)
Loading