-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
67 lines (54 loc) · 2.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from syntactic_metric import rate_repository_syntactic
from utils import delete_repo
import pandas as pd
import os
from openai_prompts import prompt_langchain
# Function to evaluate a single repository
def evaluate_repo(index, row, dataframe, is_improved=False):
# Get the repository URL from the DataFrame row
repo_url = row["Repository URL"]
print(f"Evaluating repository: {repo_url}")
# Extract the repository name from the URL
repo_name = "/".join(repo_url.split("/")[-2:])
# Determine the source of the repository (GitHub or improved)
repo_source = 'improved' if is_improved else 'github'
# Get syntactic and semantic scores for the repository
syntactic_score = rate_repository_syntactic(repo_name, repo_source)
semantic_score = prompt_langchain(repo_url if not is_improved else f"./improved_repos/{repo_name}", 'rate')
# Combine both scores into a single dictionary
score = {**syntactic_score, **semantic_score}
print(f"der Score für das das Repo: {repo_url} ist: {score}")
# Update the DataFrame with the new scores
dataframe.at[index, "Semantic Rating"] = score["semantic_score"]
dataframe.at[index, "Syntactic Rating"] = score["syntactic_score"]
# Delete the repository if it's not improved
if not is_improved:
delete_repo(repo_url)
return dataframe
if __name__ == "__main__":
# Unused variables; consider removing if not needed.
GITHUB_API_URL = "https://api.github.com"
github_token = ""
# Load repository data from CSV into a DataFrame
repositories_df = pd.read_csv("repositories.csv")
# Initialize columns for semantic and syntactic ratings
repositories_df["Semantic Rating"] = None
repositories_df["Syntactic Rating"] = None
# Check if rates.csv already exists
if not os.path.exists("rates.csv"):
# Iterate through each repository to evaluate it
for index, row in repositories_df.iterrows():
# Evaluate and update DataFrame with new scores
repositories_df = evaluate_repo(index, row, repositories_df)
# Save updated DataFrame to rates.csv
repositories_df.to_csv("rates.csv", index=False)
print('\n\n\n')
# Iterate through each repository to improve and evaluate it
for index, row in repositories_df.iterrows():
# Run code improvement for the repository
prompt_langchain(row["Repository URL"], 'improve')
# Evaluate and update DataFrame with new scores for the improved repository
repositories_df = evaluate_repo(index, row, repositories_df, is_improved=True)
# Save updated DataFrame to rates_improved.csv
repositories_df.to_csv("rates_improved.csv", index=False)
print('\n\n\n')