-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimize.c
249 lines (235 loc) · 7.55 KB
/
optimize.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/* SPDX-FileCopyrightText: 2024 - 2025 Eli Array Minkoff
*
* SPDX-License-Identifier: GPL-3.0-only
*
* implements the filter_dead function, which filters non-bf bytes and dead code
* and replaces `[-]` and `[+]` sequences with `@`. */
/* C99 */
#include <string.h>
/* internal */
#include "attributes.h"
#include "err.h"
#include "types.h"
#include "util.h"
/* filter out the non-bf characters from code->buf */
static nonnull_args void filter_non_bf(sized_buf *code) {
sized_buf tmp = newbuf(code->sz);
char instr;
for (size_t i = 0; i < code->sz; i++) {
switch (instr = code->buf[i]) {
case '[':
case '-':
case '.':
case '<':
case '>':
case ',':
case '+':
case ']':
append_obj(&tmp, &instr, 1);
break;
}
}
instr = '\0';
/* null terminate it */
append_obj(&tmp, &instr, 1);
code->sz = 0;
append_obj(code, tmp.buf, tmp.sz);
free(tmp.buf);
}
static void instr_err(
bf_err_id id, const char *msg, char instr, const char *in_name
) {
display_err((bf_comp_err){
.id = id,
.msg.ref = msg,
.instr = instr,
.file = in_name,
.has_instr = true,
});
}
/* A function that skips past a matching ].
* loop_start is a pointer to the [ at the start of the loop */
static const nonnull_args char *find_loop_end(
const char *loop_start, const char *in_name
) {
uint nest_level = 1;
const char *p = loop_start;
while (*(++p)) {
if (*p == '[') {
nest_level++;
} else if (*p == ']') {
if (--nest_level == 0) return p + 1;
}
}
/* The above loop only terminates if an '[' was unmatched. */
instr_err(
BF_ERR_UNMATCHED_OPEN,
"Could not optimize due to unmatched '['",
'[',
in_name
);
return NULL;
}
/* return true if the loops are balanced, false otherwise */
static nonnull_args bool loops_match(const char *code, const char *in_name) {
const char *open_p = strchr(code, '[');
const char *close_p = strchr(code, ']');
/* if none are found, it's fine. */
if ((open_p == NULL) && (close_p == NULL)) return true;
/* if only one is found, that's a mismatch */
if ((open_p == NULL) && !(close_p == NULL)) {
instr_err(
BF_ERR_UNMATCHED_CLOSE,
"Could not optimize due to unmatched ']'",
']',
in_name
);
return false;
}
if ((close_p == NULL) && !(open_p == NULL)) {
instr_err(
BF_ERR_UNMATCHED_OPEN,
"Could not optimize due to unmatched '['",
'[',
in_name
);
return false;
}
/* ensure that it opens before it closes */
if (open_p > close_p) return false;
/* if this point is reached, both are found. Ensure they are balanced. */
return (find_loop_end(open_p, in_name) != NULL);
}
#define REPSTR16(s) s s s s s s s s s s s s s s s s
#define REPSTR64(s) REPSTR16(s) REPSTR16(s) REPSTR16(s) REPSTR16(s)
#define REPSTR256(s) REPSTR64(s) REPSTR64(s) REPSTR64(s) REPSTR64(s)
/* remove redundant instruction sequences like `<>` */
static nonnull_args bool remove_dead(sized_buf *ir, const char *in_name) {
/* code constructs that do nothing - either 2 adjacent instructions that
* cancel each other out, or 256 consecutive `+` or `-` instructions that
* loop the current cell back to its current value */
char *str = ir->buf;
const char *simple_patterns[] = {
"<>",
"><",
"-+",
"+-",
REPSTR256("+"),
REPSTR256("-"),
};
/* don't want to compute these every loop, or even every run, so hard-code
* the known sizes of the simple patterns here. */
size_t simple_pattern_sizes[] = {2, 2, 2, 2, 256, 256};
bool matched = false;
char *match_start;
const char *loop_end;
do {
matched = false;
/* if str opens with a loop, that loop won't run, so remove it */
if (*str == '[') {
matched = true;
loop_end = find_loop_end(str, in_name);
if (loop_end == NULL) return false;
memmove(str, loop_end, strlen(loop_end) + 1);
}
/* next, remove any matches for simple_patterns[*] */
for (ufast_8 i = 0; i < 6 /* there are 6 patterns */; i++) {
while (((match_start = strstr(str, simple_patterns[i])) != NULL)) {
matched = true;
memmove(
match_start,
match_start + simple_pattern_sizes[i],
strlen(match_start) - simple_pattern_sizes[i] + 1
);
}
}
/* finally, remove any loops that begin right after other loops end */
while (((match_start = strstr(str, "][")) != NULL)) {
matched = true;
/* skip past the closing `]` */
loop_end = find_loop_end(++match_start, in_name);
if (loop_end == NULL) return false;
memmove(match_start, loop_end, strlen(loop_end) + 1);
}
} while (matched);
ir->sz = strlen(str) + 1;
return true;
}
/* Merge `[-]` and `[+]` into `@` */
static nonnull_args void merge_set_zero(sized_buf *ir) {
char *p;
/* handle [-] */
while ((p = strstr(ir->buf, "[-]"))) {
*p = '@';
memmove(p + 1, p + 3, strlen(p + 3) + 1);
ir->sz -= 2;
}
/* handle [+] */
while ((p = strstr(ir->buf, "[+]"))) {
*p = '@';
memmove(p + 1, p + 3, strlen(p + 3) + 1);
ir->sz -= 2;
}
}
/* filter out all non-BF bytes, and anything that is trivially determined to be
* dead code, or code with no effect (e.g. "+-" or "<>"), and replace "[-]" and
* "[+]" with "@".
*
* "in_name" is the source filename, and is used only to generate error
* messages. */
nonnull_args bool filter_dead(sized_buf *bf_code, const char *in_name) {
filter_non_bf(bf_code);
if (bf_code->buf == NULL) return false;
if (!loops_match(bf_code->buf, in_name)) return false;
remove_dead(bf_code, in_name);
if (bf_code->buf == NULL) return false;
merge_set_zero(bf_code);
return true;
}
#ifdef BFC_TEST
/* internal */
#include "unit_test.h"
static void strip_dead_code_test(void) {
/* fill filterable with have a bunch of valid brainfuck code which does the
* equivalent of ">[->+<]", and ensure that it is optimized down to just
* that sequence. */
sized_buf filterable = newbuf(574);
append_obj(
&filterable, "[+++++]><+---+++-[-][,[-][+>-<]]-+[-+]-+[]+-[]", 46
);
char *tgt = sb_reserve(&filterable, 256);
memset(tgt, '+', 256);
append_obj(&filterable, "[+-]>", 5);
tgt = sb_reserve(&filterable, 256);
memset(tgt, '-', 256);
append_obj(&filterable, "[->+<][,.]", 10);
for (size_t i = 0; i < filterable.sz; ++i) {
switch (filterable.buf[i]) {
case '<':
case '>':
case '+':
case '-':
case '[':
case ']':
case '.':
case ',':
continue;
default:
CU_FAIL_FATAL("filterable contains non-bf bytes");
return;
}
}
filter_dead(&filterable, "test");
/* null-terminate before string comparison */
tgt = sb_reserve(&filterable, 1);
*tgt = '\0';
CU_ASSERT_STRING_EQUAL(filterable.buf, ">[->+<]");
free(filterable.buf);
}
CU_pSuite register_optimize_tests(void) {
CU_pSuite suite;
INIT_SUITE(suite);
ADD_TEST(suite, strip_dead_code_test);
return suite;
}
#endif