forked from google/bindiff
-
Notifications
You must be signed in to change notification settings - Fork 0
/
instruction.cc
218 lines (190 loc) · 7.25 KB
/
instruction.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
// Copyright 2011-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "third_party/zynamics/bindiff/instruction.h"
#include <algorithm>
#include <boost/iterator/reverse_iterator.hpp> // NOLINT
#include <boost/iterator/transform_iterator.hpp> // NOLINT
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iterator>
#include <list>
#include <string>
#include <vector>
#include "third_party/absl/log/check.h"
#include "third_party/absl/log/log.h"
namespace security::bindiff {
namespace {
using Lengths = std::vector<int>;
// Calculate LCS row lengths given iterator ranges into two sequences.
// On completion, `lens` holds LCS lengths in the final row.
template <typename Iterator>
void LcsLens(Iterator xlo, Iterator xhi, Iterator ylo, Iterator yhi,
Lengths& lens) {
// Two rows of workspace.
// Careful! We need the 1 for the leftmost column.
Lengths curr(1 + distance(ylo, yhi), 0);
Lengths prev(curr);
for (Iterator x = xlo; x != xhi; ++x) {
swap(prev, curr);
int i = 0;
for (Iterator y = ylo; y != yhi; ++y, ++i) {
curr[i + 1] = *x == *y ? prev[i] + 1 : std::max(curr[i], prev[i + 1]);
}
}
swap(lens, curr);
}
// Recursive LCS calculation. See Hirschberg for the theory!
// This is a divide and conquer algorithm. In the recursive case, we split the
// xrange in two. Then, by calculating lengths of LCSes from the start and end
// corners of the [xlo, xhi] x [ylo, yhi] grid, we determine where the yrange
// should be split.
// xo is the origin (element 0) of the xs sequence
// xlo, xhi is the range of xs being processed
// ylo, yhi is the range of ys being processed
// Parameter xs_in_lcs holds the members of xs in the LCS.
template <typename Iterator, typename OutIterator>
void ComputeLcs(Iterator xo, Iterator xlo, Iterator xhi, Iterator yo,
Iterator ylo, Iterator yhi, OutIterator xout,
OutIterator yout) {
using ReverseIterator = boost::reverse_iterator<Iterator>;
const auto nx = distance(xlo, xhi);
if (nx == 0) { // all done
} else if (nx == 1) { // single item in x range.
// If it's in the yrange, mark its position in the LCS.
const Iterator pos = std::find(ylo, yhi, *xlo);
if (pos != yhi) {
*xout++ = distance(xo, xlo);
*yout++ = distance(yo, pos);
}
} else { // split the xrange
Iterator xmid = xlo + nx / 2;
// Find LCS lengths at xmid, working from both ends of the range
Lengths ll_b;
Lengths ll_e;
ReverseIterator hix(xhi);
ReverseIterator midx(xmid);
ReverseIterator hiy(yhi);
ReverseIterator loy(ylo);
LcsLens(xlo, xmid, ylo, yhi, ll_b);
LcsLens(hix, midx, hiy, loy, ll_e);
// Find the optimal place to split the y range
auto e = ll_e.rbegin();
int lmax = -1;
Iterator y = ylo;
Iterator ymid = ylo;
for (auto b = ll_b.cbegin(); b != ll_b.cend(); ++b, ++e) {
if (*b + *e > lmax) {
lmax = *b + *e;
ymid = y;
}
if (y != yhi) {
++y;
}
}
ComputeLcs(xo, xlo, xmid, yo, ylo, ymid, xout, yout);
ComputeLcs(xo, xmid, xhi, yo, ymid, yhi, xout, yout);
}
}
template <typename Iterator, typename OutIterator>
void ComputeLcs(Iterator xbegin, Iterator xend, Iterator ybegin, Iterator yend,
OutIterator xout, OutIterator yout) {
if (xbegin == xend || ybegin == yend) {
return;
}
// Optimize by eliminating common prefixes.
Iterator start1 = xbegin;
Iterator start2 = ybegin;
for (; start1 != xend && start2 != yend && *start1 == *start2;
++start1, ++start2) {
*xout++ = distance(xbegin, start1);
*yout++ = distance(ybegin, start2);
}
// Early exit if one string is completely contained in the other.
if (start1 == xend || start2 == yend) {
return;
}
// Optimize by eliminating common suffixes.
Iterator rstart1 = xend;
--rstart1;
Iterator rstart2 = yend;
--rstart2;
for (; rstart1 != start1 && rstart2 != start2 && *rstart1 == *rstart2;
--rstart1, --rstart2) {
// intentionally left blank
}
rstart1++;
rstart2++;
ComputeLcs(xbegin, start1, rstart1, ybegin, start2, rstart2, xout, yout);
for (; rstart1 != xend && rstart2 != yend; ++rstart1, ++rstart2) {
*xout++ = distance(xbegin, rstart1);
*yout++ = distance(ybegin, rstart2);
}
}
struct ProjectPrime {
size_t operator()(const Instruction& instruction) const {
return instruction.GetPrime();
}
};
} // namespace
void ComputeLcs(const Instructions::const_iterator& instructions1_begin,
const Instructions::const_iterator& instructions1_end,
const Instructions::const_iterator& instructions2_begin,
const Instructions::const_iterator& instructions2_end,
InstructionMatches& matches) {
std::list<size_t> matches1;
std::list<size_t> matches2;
using Iterator =
boost::transform_iterator<ProjectPrime, Instructions::const_iterator>;
ComputeLcs(Iterator(instructions1_begin, ProjectPrime()),
Iterator(instructions1_end, ProjectPrime()),
Iterator(instructions2_begin, ProjectPrime()),
Iterator(instructions2_end, ProjectPrime()),
std::back_inserter(matches1), std::back_inserter(matches2));
for (auto i = matches1.begin(), j = matches2.begin();
i != matches1.end() && j != matches2.end(); ++i, ++j) {
matches.emplace_back(&*(instructions1_begin + *i),
&*(instructions2_begin + *j));
}
matches.shrink_to_fit();
}
// Cache must have a lifetime longer than instruction, otherwise cache_entry_
// will point to invalid memory.
Instruction::Instruction(Cache* cache, Address address,
const std::string& mnemonic, uint32_t prime)
: address_(address), prime_(prime) {
// The differ standalone version is multi-threaded so be careful with static
// variables and the like.
CHECK(cache != nullptr);
auto prime_to_mnemonic = cache->find(prime);
if (prime_to_mnemonic != cache->end()) {
if (!prime_to_mnemonic->second.empty() && !mnemonic.empty() &&
prime_to_mnemonic->second != mnemonic) {
// Test for empty mnemonics as one of the VxClass space optimizations is
// to omit the actual mnemonic strings. If you then diff a file containing
// strings against one that doesn't, you'll get spurious warnings.
LOG(INFO) << "Hash collision detected! Mnemonics '"
<< prime_to_mnemonic->second << "' and '" << mnemonic
<< "', hash: " << prime;
}
} else {
(*cache)[prime] = mnemonic;
}
}
Address Instruction::GetAddress() const { return address_; }
uint32_t Instruction::GetPrime() const { return prime_; }
std::string Instruction::GetMnemonic(const Cache* cache) const {
return cache->find(GetPrime())->second;
}
} // namespace security::bindiff