Skip to content

Commit

Permalink
refactor: optimizes featurebased algorithms.
Browse files Browse the repository at this point in the history
  • Loading branch information
Artanias committed Sep 28, 2024
1 parent 3eef1bc commit c44361a
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
UTIL_VERSION := 0.5.4
UTIL_VERSION := 0.5.5
UTIL_NAME := codeplag
PWD := $(shell pwd)

Expand Down
2 changes: 1 addition & 1 deletion src/codeplag/algorithms/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def compare_works(
if threshold and (fast_metrics.weighted_average * 100.0) < threshold:
return CompareInfo(fast=fast_metrics)

compliance_matrix = np.zeros(
compliance_matrix = np.empty(
(len(features1.head_nodes), len(features2.head_nodes), 2), dtype=np.int64
)
struct_res = struct_compare(features1.structure, features2.structure, compliance_matrix)
Expand Down
21 changes: 11 additions & 10 deletions src/codeplag/algorithms/featurebased.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,23 @@ def counter_metric(counter1: Mapping[str, int], counter2: Mapping[str, int]) ->
if len(counter1) == 0 and len(counter2) == 0:
return 1.0

percent_of_same = [0, 0]
percent_of_same_numerator = 0
percent_of_same_denominator = 0
for key in counter1:
if key not in counter2:
percent_of_same[1] += counter1[key]
percent_of_same_denominator += counter1[key]
continue
percent_of_same[0] += min(counter1[key], counter2[key])
percent_of_same[1] += max(counter1[key], counter2[key])
percent_of_same_numerator += min(counter1[key], counter2[key])
percent_of_same_denominator += max(counter1[key], counter2[key])
for key in counter2:
if key not in counter1:
percent_of_same[1] += counter2[key]
percent_of_same_denominator += counter2[key]
continue

if percent_of_same[1] == 0:
if percent_of_same_denominator == 0:
return 0.0

return percent_of_same[0] / percent_of_same[1]
return percent_of_same_numerator / percent_of_same_denominator


def op_shift_metric(ops1: list[str], ops2: list[str]) -> tuple[int, float]:
Expand All @@ -54,7 +55,7 @@ def op_shift_metric(ops1: list[str], ops2: list[str]) -> tuple[int, float]:
ops1, ops2 = ops2, ops1
count_el_f, count_el_s = count_el_s, count_el_f

y = np.zeros(count_el_s, dtype=np.float32)
y = np.empty(count_el_s, dtype=np.float32)

shift = 0
while shift < count_el_s:
Expand Down Expand Up @@ -234,11 +235,11 @@ def struct_compare(
key_indexes1.append(count_of_nodes1)
key_indexes2.append(count_of_nodes2)

array = np.zeros((count_of_children1, count_of_children2, 2), dtype=np.int64)
array = np.empty((count_of_children1, count_of_children2, 2), dtype=np.int64)

for i in np.arange(0, count_of_children1, 1):
section1 = tree1[key_indexes1[i] + 1 : key_indexes1[i + 1]]
for j in np.arange(0, count_of_children2, 1):
section1 = tree1[key_indexes1[i] + 1 : key_indexes1[i + 1]]
section2 = tree2[key_indexes2[j] + 1 : key_indexes2[j + 1]]
array[i][j] = struct_compare(section1, section2)

Expand Down
2 changes: 1 addition & 1 deletion src/codeplag/algorithms/stringbased.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self: Self, sequence1: Sequence, sequence2: Sequence) -> None:
self.s1_length = len(sequence1)
self.s2_length = len(sequence2)
self.distance = -1
self.distance_matrix = np.zeros((self.s1_length + 1, self.s2_length + 1), dtype=np.int64)
self.distance_matrix = np.empty((self.s1_length + 1, self.s2_length + 1), dtype=np.int64)

@staticmethod
def m(symbol1: str, symbol2: str) -> Literal[0, 1]:
Expand Down
2 changes: 1 addition & 1 deletion src/codeplag/handlers/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def compliance_matrix_to_df(
head_nodes1: list[str],
head_nodes2: list[str],
) -> pd.DataFrame:
data = np.zeros(
data = np.empty(
(
compliance_matrix.shape[0],
compliance_matrix.shape[1],
Expand Down
2 changes: 1 addition & 1 deletion src/codeplag/handlers/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def html_report_create(report_path: Path, report_type: ReportType) -> Literal[0,

def _convert_similarity_matrix_to_percent_matrix(matrix: NDArray) -> NDArray:
"""Convert compliance matrix of size N x M x 2 to percent 2 dimensional matrix."""
percent_matrix = np.zeros((matrix.shape[0], matrix.shape[1]), dtype=np.float64)
percent_matrix = np.empty((matrix.shape[0], matrix.shape[1]), dtype=np.float64)
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
percent_matrix[i][j] = round(matrix[i][j][0] / matrix[i][j][1] * 100, 2)
Expand Down
4 changes: 2 additions & 2 deletions test/unit/codeplag/algorithms/test_featurebased.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_struct_compare_normal(self: Self) -> None:
(3, 8), (3, 8), (2, 9)]
count_ch1 = (get_children_indexes(structure1, len(structure1)))[1]
count_ch2 = (get_children_indexes(structure2, len(structure2)))[1]
compliance_matrix = np.zeros((count_ch1, count_ch2, 2),
compliance_matrix = np.empty((count_ch1, count_ch2, 2),
dtype=np.int64)
res = struct_compare(structure1, structure2,
compliance_matrix)
Expand All @@ -160,7 +160,7 @@ def test_struct_compare_normal(self: Self) -> None:
(4, 4), (5, 8), (4, 10), (5, 4)]
count_ch1 = (get_children_indexes(structure1, len(structure1)))[1]
count_ch2 = (get_children_indexes(structure2, len(structure2)))[1]
compliance_matrix = np.zeros((count_ch1, count_ch2, 2),
compliance_matrix = np.empty((count_ch1, count_ch2, 2),
dtype=np.int64)
res = struct_compare(structure1, structure2,
compliance_matrix)
Expand Down

0 comments on commit c44361a

Please sign in to comment.