Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add circleci support; bugfixes #3985

Merged
merged 7 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sweepai/config/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,5 @@

assert OPENAI_API_KEY, "OPENAI_API_KEY is required."
assert COHERE_API_KEY, "COHERE_API_KEY is required."

CIRCLE_CI_PAT = os.environ.get("CIRCLE_CI_PAT", None) # if this is present, we will poll from and get logs from circleci
34 changes: 5 additions & 29 deletions sweepai/core/sweep_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def to_raw_string(s):
{github_actions_logs}
</github_actions_logs>

You have previously already made the following changes:
You have previously made the following changes. The diffs represent the current state of the file/project:
<changes_made>
{changes_made}
</changes_made>
Expand All @@ -122,7 +122,7 @@ def to_raw_string(s):
{current_github_actions_logs}
</current_github_actions_logs>

You have previously already made the following changes:
You have previously made the following changes. The diffs represent the current state of the file/project:
<changes_made>
{changes_made}
</changes_made>
Expand Down Expand Up @@ -1136,7 +1136,7 @@ def context_get_files_to_change(
content=snippet.expand(300).get_snippet(add_lines=False) if snippet.type_name == "source" else snippet.get_snippet(add_lines=False),
) for i, snippet in enumerate(relevant_snippets)
)
relevant_snippets_message = f"# Relevant codebase files:\nHere are the relevant files from the codebase. We previously summarized each of the files to help you solve the GitHub issue. These will be your primary reference to solve the problem:\n\n<relevant_files>\n{joined_relevant_snippets}\n</relevant_files>"
relevant_snippets_message = f"# Relevant codebase files:\nHere are the relevant files from the codebase. These will be your primary reference to solve the problem:\n\n<relevant_files>\n{joined_relevant_snippets}\n</relevant_files>"
messages.append(
Message(
role="user",
Expand Down Expand Up @@ -1450,7 +1450,7 @@ def get_files_to_change_for_gha(
messages.append(
Message(role="system", content=issue_sub_request_system_prompt, key="system")
)

# update the state of the snippets to be current
for relevant_snippet in relevant_snippets:
if relevant_snippet.file_path in updated_files:
relevant_snippet.content = updated_files[relevant_snippet.file_path]["contents"]
Expand All @@ -1459,18 +1459,6 @@ def get_files_to_change_for_gha(
if read_only_snippet.file_path in updated_files:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of the block of code that reclassified snippets based on the presence of "test" in their file paths could lead to incorrect classification of snippets.

read_only_snippet.content = updated_files[read_only_snippet.file_path]["contents"]

new_relevant_snippets = []
new_read_only_snippets = []
for snippet in relevant_snippets + read_only_snippets:
if snippet in new_relevant_snippets or snippet in new_read_only_snippets:
continue
if "test" not in snippet.file_path:
new_read_only_snippets.append(snippet)
else:
new_relevant_snippets.append(snippet)
relevant_snippets = new_relevant_snippets
read_only_snippets = new_read_only_snippets

interleaved_snippets = []
for i in range(max(len(relevant_snippets), len(read_only_snippets))):
if i < len(relevant_snippets):
Expand Down Expand Up @@ -1499,7 +1487,6 @@ def get_files_to_change_for_gha(
key="relevant_snippets",
)
)

relevant_snippet_template = '<relevant_file index="{i}">\n<file_path>\n{file_path}\n</file_path>\n<source>\n{content}\n</source>\n</relevant_file>'
joined_relevant_snippets = "\n".join(
relevant_snippet_template.format(
Expand All @@ -1508,25 +1495,14 @@ def get_files_to_change_for_gha(
content=snippet.expand(300).get_snippet(add_lines=False) if snippet.type_name == "source" else snippet.get_snippet(add_lines=False),
) for i, snippet in enumerate(relevant_snippets)
)
relevant_snippets_message = f"# Relevant codebase files:\nHere are the relevant files from the codebase. We previously summarized each of the files to help you solve the GitHub issue. These will be your primary reference to solve the problem:\n\n<relevant_files>\n{joined_relevant_snippets}\n</relevant_files>"
relevant_snippets_message = f"# Relevant codebase files:\nHere are the relevant files from the codebase. These will be your primary reference to solve the problem:\n\n<relevant_files>\n{joined_relevant_snippets}\n</relevant_files>"
messages.append(
Message(
role="user",
content=relevant_snippets_message,
key="relevant_snippets",
)
)
# previous_diffs = get_previous_diffs(
# problem_statement,
# cloned_repo=cloned_repo,
# relevant_file_paths=[snippet.file_path for snippet in relevant_snippets],
# )
# messages.append( # temporarily disable in main
# Message(
# role="user",
# content=previous_diffs,
# )
# )
messages.append(
Message(
role="user",
Expand Down
123 changes: 116 additions & 7 deletions sweepai/handlers/on_check_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,22 @@
"""
import io
import re
from time import sleep
import zipfile

from loguru import logger
import requests

from github.Repository import Repository
from github.CommitStatus import CommitStatus
from sweepai.config.server import CIRCLE_CI_PAT
from sweepai.logn.cache import file_cache
from sweepai.utils.github_utils import get_token

MAX_LINES = 500
LINES_TO_KEEP = 100
CIRCLECI_SLEEP_DURATION_SECONDS = 15

log_message = """GitHub actions yielded the following error.

{error_logs}
Expand All @@ -28,6 +37,8 @@ def get_files_in_dir(zipfile: zipfile.ZipFile, dir: str):
if file.startswith(dir) and not file.endswith("/")
]

def remove_ansi_tags(logs: str) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sweep has identified a redundant function: The new remove_ansi_tags function is useless because its exact functionality is already implemented by the existing strip_ansi_codes function in code_validators.py.

return re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', "", logs, flags=re.MULTILINE)

@file_cache()
def download_logs(repo_full_name: str, run_id: int, installation_id: int, get_errors_only=True):
Expand Down Expand Up @@ -71,8 +82,6 @@ def download_logs(repo_full_name: str, run_id: int, installation_id: int, get_er

def clean_gh_logs(logs_str: str):
# Extraction process could be better
MAX_LINES = 500
LINES_TO_KEEP = 100
log_list = logs_str.split("\n")
truncated_logs = [log[log.find(" ") + 1 :] for log in log_list]
logs_str = "\n".join(truncated_logs)
Expand All @@ -84,6 +93,13 @@ def clean_gh_logs(logs_str: str):
command_line = match.group(1).strip()
log_content = match.group(2).strip()
error_line = match.group(3).strip() # can be super long
return clean_cicd_logs(
command=command_line,
error=error_line,
logs=log_content,
)

def clean_cicd_logs(command: str, error: str, logs: str):
patterns = [
# for docker
"Already exists",
Expand Down Expand Up @@ -111,21 +127,114 @@ def clean_gh_logs(logs_str: str):
]
cleaned_logs = [
log.strip()
for log in log_content.split("\n")
for log in logs.split("\n")
if not any(log.strip().startswith(pattern) for pattern in patterns)
]
if len(cleaned_logs) > MAX_LINES:
# return the first LINES_TO_KEEP and the last LINES_TO_KEEP
cleaned_logs = cleaned_logs[:LINES_TO_KEEP] + ["..."] + cleaned_logs[-LINES_TO_KEEP:]
cleaned_logs_str = "\n".join(cleaned_logs)
error_content = ""
if len(error_line) < 200000:
if len(error) < 200000:
error_content = f"""<errors>
{error_line}
{error}
</errors>"""
cleaned_response = gha_prompt.format(
command_line=command_line,
command_line=command,
error_content=error_content,
cleaned_logs_str=cleaned_logs_str,
)
return cleaned_response
return cleaned_response

def get_circleci_job_details(job_number, project_slug, vcs_type='github'):
# project_slug is the repo full name
headers = {'Circle-Token': CIRCLE_CI_PAT}
url = f"https://circleci.com/api/v1.1/project/{vcs_type}/{project_slug}/{job_number}"
response = requests.get(url, headers=headers)
return response.json()

# take a commit and return all failing logs as a list
def get_failing_circleci_log_from_url(circleci_run_url: str, repo_full_name: str):
if not CIRCLE_CI_PAT:
logger.warning("CIRCLE_CI_APIKEY not set")
return []
headers = {'Circle-Token': CIRCLE_CI_PAT}
job_number = circleci_run_url.split("/")[-1]
circleci_run_details = get_circleci_job_details(job_number, repo_full_name)
steps = circleci_run_details['steps']
failing_steps = []
failed_commands_and_logs = ""
for step in steps:
if step['actions'][0]['exit_code'] != 0:
failing_steps.append(step)
for step in failing_steps:
actions = step['actions']
for action in actions:
if action.get("status") != "failed":
continue
if 'output_url' in action:
log_url = action['output_url']
log_response = requests.get(log_url, headers=headers)
log_response = log_response.json()
# these might return in a different order; watch out
log_message = log_response[0]["message"] if len(log_response) > 0 else ""
error_message = log_response[1].get("message", "") if len(log_response) > 1 else ""
log_message = remove_ansi_tags(log_message)
error_message = remove_ansi_tags(error_message)
command = action.get("bash_command", "No command found.") # seems like this is the only command
circle_ci_failing_logs = clean_cicd_logs(
command=command,
error=error_message,
logs=log_message,
)
if circle_ci_failing_logs:
failed_commands_and_logs += circle_ci_failing_logs + "\n"
return failed_commands_and_logs

def get_failing_circleci_logs(
repo: Repository,
current_commit: str,
):
# get the pygithub commit object
all_logs = ""
failing_statuses = []
total_poll_attempts = 0
# hacky workaround because circleci can have a setup that takes a long time, and we will report "success" because the setup has finished but the actual CI is still running
logger.debug("Waiting for 60 seconds before polling for CircleCI status.")
sleep(60)
while True:
commit = repo.get_commit(current_commit)
status = commit.get_combined_status()
# https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#get-the-combined-status-for-a-specific-reference
all_statuses: list[CommitStatus] = status.statuses
# if all are success, break
if all(status.state == "success" for status in all_statuses):
failing_statuses = []
logger.debug(f"Exiting polling for CircleCI as all statuses are success. Statuses were: {all_statuses}")
break
# if any of the statuses are failure, return those statuses
failing_statuses = [status for status in all_statuses if status.state == "failure"]
if failing_statuses:
logger.debug(f"Exiting polling for CircleCI as some statuses are failing. Statuses were: {all_statuses}")
break
# if any of the statuses are pending, sleep and try again
if any(status.state == "pending" for status in all_statuses):
if total_poll_attempts * CIRCLECI_SLEEP_DURATION_SECONDS // 60 >= 60:
logger.debug("Polling for CircleCI has taken too long, giving up.")
break
# wait between check attempts
total_poll_attempts += 1
logger.debug(f"Polling to see if CircleCI has finished... {total_poll_attempts}.")
sleep(CIRCLECI_SLEEP_DURATION_SECONDS)
continue
# done polling
for status_detail in failing_statuses:
# CircleCI run detected
if 'circleci' in status_detail.context.lower():
failing_circle_ci_log = get_failing_circleci_log_from_url(
circleci_run_url=status_detail.target_url,
repo_full_name=repo.full_name
) # may be empty string
if failing_circle_ci_log:
all_logs += failing_circle_ci_log + "\n"
return all_logs
Loading
Loading