From c1f4cf1d7363afe83f63584c1654e1433a85caef Mon Sep 17 00:00:00 2001 From: doinaoki Date: Sun, 6 Oct 2024 03:51:31 +0900 Subject: [PATCH] fix: threshold renamings --- evaluation-lightweight.sh | 2 +- renas/preliminaryResearch.sh | 2 +- renas/relationship/analyzer.py | 11 +++++------ renas/repository_analyzer.py | 8 ++++---- renas/researchQuestion.sh | 2 +- renas/researchQuestionManually.sh | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/evaluation-lightweight.sh b/evaluation-lightweight.sh index 5a592a79..3c4e9046 100644 --- a/evaluation-lightweight.sh +++ b/evaluation-lightweight.sh @@ -3,6 +3,6 @@ args="" projects="projects/baasbox" -python3 -m renas.repository_analyzer $projects +python3 -m renas.repository_analyzer $projects -threshold 1 python3 -m renas.recommendation $projects python3 -m renas.evaluator -sim -pre $projects diff --git a/renas/preliminaryResearch.sh b/renas/preliminaryResearch.sh index 47cdf044..4fb8653e 100644 --- a/renas/preliminaryResearch.sh +++ b/renas/preliminaryResearch.sh @@ -5,7 +5,7 @@ projects=("baasbox" "cordova-plugin-local-notifications" "morphia" "spring-integ for line in "${projects[@]}" do -python3 -m renas.repository_analyzer "projects/${line}" +python3 -m renas.repository_analyzer "projects/${line}" -threshold 1 python3 -m renas.recommendation "projects/${line}" args="${args} projects/${line}" done diff --git a/renas/relationship/analyzer.py b/renas/relationship/analyzer.py index bd76d071..708fe7fb 100644 --- a/renas/relationship/analyzer.py +++ b/renas/relationship/analyzer.py @@ -38,13 +38,12 @@ def set_logger(level): return root_logger -def filter_data(data, threshold): +def filter_data(data, threshold: int): LOGGER.info("filter data") commits = data.groupby("commit").size() - if threshold: - commits = commits[commits > 3] - else: - commits = commits[commits > 1] + + commits = commits[commits > threshold] + LOGGER.info(f"threshold = more than {threshold} renames") LOGGER.info(f"total {commits.sum()} renames") LOGGER.info(f"pick {len(commits)} commits") return data[data["commit"].isin(commits.index)] @@ -86,7 +85,7 @@ def git_archive_wrapper(arg): return git_archive(*arg) -def main(root: pathlib.Path, rename_data: pd.DataFrame, threshold: bool): +def main(root: pathlib.Path, rename_data: pd.DataFrame, threshold: int): set_logger(INFO) try: rename_data = filter_data(rename_data, threshold) diff --git a/renas/repository_analyzer.py b/renas/repository_analyzer.py index d4b8da99..ee3c3eb4 100644 --- a/renas/repository_analyzer.py +++ b/renas/repository_analyzer.py @@ -15,9 +15,9 @@ def set_argument(): ) parser.add_argument( "-threshold", - help="use commit which has more than 3 renames", - action="store_true", - default=False, + help="use commit which has more than specifying renames", + action="store", + default=0, ) parser.add_argument( "-f", @@ -38,7 +38,7 @@ def main(root, args): dump(root, rename_data) else: rename_data = pd.read_json(rename_path, orient="records") - analyzer.main(root, rename_data, args.threshold) + analyzer.main(root, rename_data, int(args.threshold)) def dump(root, data: pd.DataFrame): diff --git a/renas/researchQuestion.sh b/renas/researchQuestion.sh index 4dc0d2b9..dac44c2b 100644 --- a/renas/researchQuestion.sh +++ b/renas/researchQuestion.sh @@ -5,7 +5,7 @@ projects=("testng" "jackson-databind" "rest.li" "Activiti" "k-9" "genie" "eucaly for line in "${projects[@]}" do -python3 -m renas.repository_analyzer "projects/${line}" +python3 -m renas.repository_analyzer "projects/${line}" -threshold 3 python3 -m renas.recommendation "projects/${line}" args="${args} projects/${line}" done diff --git a/renas/researchQuestionManually.sh b/renas/researchQuestionManually.sh index 2009fcfd..1830e544 100644 --- a/renas/researchQuestionManually.sh +++ b/renas/researchQuestionManually.sh @@ -5,7 +5,7 @@ projects=("ratpack" "argouml") for line in "${projects[@]}" do -python3 -m renas.repository_analyzer "projects/${line}" +python3 -m renas.repository_analyzer "projects/${line}" -threshold 1 python3 -m renas.recommendation "projects/${line}" args="${args} projects/${line}" done