-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathhasher_test.py
137 lines (117 loc) · 5.12 KB
/
hasher_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Copyright 2023 Google LLC
#
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd
"""Tests for hasher."""
import functools
from absl import logging
import mmh3
from vanir import hasher
from absl.testing import absltest
from absl.testing import parameterized
_TEST_LINE_CHUNK_NORMALIZED_CODE = {
5: 'void __init testdev_init ( void )',
6: '{',
7: 'memset ( cdev , 0 , sizeof * cdev ) ;',
8: 'init_list_head ( & cdev -> list ) ;',
9: 'kobject_init ( & cdev -> kobj , & ktype_cdev_default ) ;',
10: 'cdev -> ops = fops ;',
11: '}',
19: 'cdev_map = kobj_map_init ( base_probe , & testdevs_lock ) ;',
20: '}',
24: 'export_symbol ( register_testdev_region ) ;',
34: 'export_symbol ( __register_testdev ) ;',
35: 'export_symbol ( __unregister_testdev ) ;'
}
class HasherTest(parameterized.TestCase):
def setUp(self):
self._hash = functools.partial(
mmh3.hash128, seed=0, x64arch=True, signed=False)
super().setUp()
def test_function_chunk_hash(self):
test_normalized_code = (
'DTYPE FUNCNAME ( const unsigned DTYPE PARAM ) { const DTYPE '
'* VAR = ( DTYPE * ) globalvar -> data '
'; FUNCCALL ( PARAM , VAR ) ; 0xe8 ( ) ; return 0 ; }')
function_hash = hasher.hash_function_chunk(test_normalized_code)
expected_function_hash = self._hash(test_normalized_code)
self.assertEqual(function_hash, expected_function_hash)
@parameterized.named_parameters(
dict(
testcase_name='with_no_affected_ranges',
normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
affected_ranges=[],
expected_used_lines=[5, 6, 7, 8, 9, 10, 11, 19, 20, 24, 34, 35]),
dict(
testcase_name='with_affected_ranges_in_middle',
normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
affected_ranges=[(9, 10)],
expected_used_lines=[6, 7, 8, 9, 10, 11, 19, 20]),
dict(
testcase_name='with_affected_ranges_at_file_start',
normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
affected_ranges=[(0, 0)],
expected_used_lines=[5, 6, 7, 8]),
dict(
testcase_name='with_affected_ranges_at_file_end',
normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
affected_ranges=[(50, 50)],
expected_used_lines=[20, 24, 34, 35]),
dict(
testcase_name='with_short_normalized_code',
normalized_code={
3: '#define AUDIT_NAMES 5',
4: '#define auditsc_get_stamp ( c , t , s ) 0'
},
affected_ranges=[],
expected_used_lines=[3, 4]))
def test_line_chunk_hash(self, normalized_code, affected_ranges,
expected_used_lines):
"""Tests various successful cases of line chunk hash generation.
Args:
normalized_code: the normalized code to test.
affected_ranges: the affected ranges to test with for the normalized code.
expected_used_lines: expected lines to be used for signature hash
generation. This value varies depending on |affected_ranges| but this
test explicitly requires this arg because we want to test the
corresponding logic in the main code rather than to run the identical
logic again in the test.
"""
expected_hashes = []
expected_line_number_ngrams = []
index = 0
while index + 3 < len(expected_used_lines):
expected_line_number_ngrams.append(expected_used_lines[index:index + 4])
index += 1
if not expected_line_number_ngrams:
expected_line_number_ngrams.append(normalized_code.keys())
for line_numbers in expected_line_number_ngrams:
ngram = ' '.join(
[normalized_code[line_number] for line_number in line_numbers])
expected_hashes.append(self._hash(ngram))
line_hashes, used_lines = hasher.hash_line_chunk(normalized_code,
affected_ranges)
self.assertCountEqual(expected_used_lines, used_lines)
self.assertEqual(expected_hashes, line_hashes)
def test_line_chunk_hash_with_empty_normalized_code_is_warned(self):
test_normalized_code = {}
with self.assertLogs(level=logging.WARNING) as logs:
line_hashes, used_lines = hasher.hash_line_chunk(test_normalized_code, [])
self.assertIn(
'No valid line found from the normalized code. Returning empty lists.',
logs.output[0])
self.assertEmpty(line_hashes)
self.assertEmpty(used_lines)
def test_line_ngram_overlap_check_fails_with_reversed_line_range(self):
# Case unable to be triggered by public class; directly test private class.
test_ngram_line_numbers = [7, 8, 9, 10]
test_affected_line_range = (10, 7)
ngram = hasher._LineNgram(_TEST_LINE_CHUNK_NORMALIZED_CODE,
test_ngram_line_numbers)
expected_error_msg = (
r'line_range: start \(10\) cannot be greater than end \(7\)')
with self.assertRaisesRegex(ValueError, expected_error_msg):
ngram.is_overlapping(test_affected_line_range)
if __name__ == '__main__':
absltest.main()