Skip to content

Commit

Permalink
bump
Browse files Browse the repository at this point in the history
  • Loading branch information
ShawSumma committed Oct 25, 2023
1 parent 6779efe commit 4832ea9
Show file tree
Hide file tree
Showing 15 changed files with 415 additions and 204 deletions.
1 change: 1 addition & 0 deletions tb/include/tb_x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,6 @@ typedef struct {

bool tb_x86_disasm(TB_X86_Inst* restrict inst, size_t length, const uint8_t* data);
const char* tb_x86_reg_name(int8_t reg, TB_X86_DataType dt);
const char* tb_x86_type_name(TB_X86_DataType dt);

#endif /* TB_X64_H */
4 changes: 0 additions & 4 deletions tb/src/codegen/emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ typedef struct {
TB_Function* f;
TB_FunctionOutput* output;

bool emit_asm;
TB_Assembly *head_asm, *tail_asm;

// this is mapped to a giant buffer and is technically
Expand Down Expand Up @@ -51,9 +50,6 @@ static void tb_reloc4(TB_CGEmitter* restrict e, uint32_t p, uint32_t b) {
}

static void tb_asm_print(TB_CGEmitter* restrict e, const char* fmt, ...) {
// let's hope the optimizer can hoist this early-out outside of the call
if (!e->emit_asm) { return; }

// make sure we have enough bytes for the operation
TB_Assembly* new_head = e->tail_asm;
if (new_head == NULL || new_head->length + 100 >= TB_ASSEMBLY_CHUNK_CAP) {
Expand Down
24 changes: 22 additions & 2 deletions tb/src/codegen/generic_cg.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ static bool is_terminator(int type);
static bool wont_spill_around(int type);
static int classify_reg_class(TB_DataType dt);
static void isel(Ctx* restrict ctx, TB_Node* n, int dst);
static void disassemble(TB_CGEmitter* e, int id, size_t pos, size_t end);
static bool should_rematerialize(TB_Node* n);

static void emit_code(Ctx* restrict ctx, TB_FunctionOutput* restrict func_out, int end);
Expand Down Expand Up @@ -902,7 +903,6 @@ static void compile_function(TB_Passes* restrict p, TB_FunctionOutput* restrict
.target_abi = f->super.module->target_abi,
.emit = {
.f = f,
.emit_asm = emit_asm,
.output = func_out,
.data = out,
.capacity = out_capacity,
Expand Down Expand Up @@ -991,7 +991,6 @@ static void compile_function(TB_Passes* restrict p, TB_FunctionOutput* restrict
}
p->worklist = ctx.worklist;

EMITA(&ctx.emit, "%s:\n", f->super.name);
{
int end;
CUIK_TIMED_BLOCK("data flow") {
Expand All @@ -1017,6 +1016,27 @@ static void compile_function(TB_Passes* restrict p, TB_FunctionOutput* restrict
}
}

if (emit_asm) {
printf("%s:\n", f->super.name);

// dump epilogue
disassemble(&ctx.emit, -1, 0, func_out->prologue_length);

TB_Node** bbs = ctx.worklist.items;
FOREACH_N(i, 0, ctx.bb_count) {
TB_Node* bb = bbs[bb_order[i]];

uint32_t start = nl_map_get_checked(ctx.emit.labels, bb) & ~0x80000000;
uint32_t end = ctx.emit.count;
if (i + 1 < ctx.bb_count) {
TB_Node* next = bbs[bb_order[i + 1]];
end = nl_map_get_checked(ctx.emit.labels, next) & ~0x80000000;
}

disassemble(&ctx.emit, i, start, end);
}
}

tb_free_cfg(&ctx.cfg);
nl_map_free(ctx.emit.labels);
nl_map_free(ctx.machine_bbs);
Expand Down
2 changes: 1 addition & 1 deletion tb/src/opt/branches.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static TB_Node* ideal_phi(TB_Passes* restrict opt, TB_Function* f, TB_Node* n) {
// "switch" logic for data.
TB_DataType dt = n->dt;
TB_Node* region = n->inputs[0];
if (region->input_count == 2) {
if (n->dt.type != TB_MEMORY && region->input_count == 2) {
// for now we'll leave multi-phi scenarios alone, we need
// to come up with a cost-model around this stuff.
for (User* use = find_users(opt, region); use; use = use->next) {
Expand Down
34 changes: 22 additions & 12 deletions tb/src/opt/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ TB_CFG tb_compute_rpo2(TB_Function* f, Worklist* ws, DynArray(TB_Node*)* tmp_sta
// we've spotted a BB entry
if (cfg_is_bb_entry(n)) {
// proj BB's will prefer to be REGION BB's
if (n->inputs[0]->type != TB_START && n->type == TB_PROJ && n->users->n->type == TB_REGION) {
if (n->inputs[0]->type != TB_START && n->type == TB_PROJ && n->users->next == NULL && n->users->n->type == TB_REGION) {
// we've already seen this BB, let's skip it
if (worklist_test_n_set(ws, n->users->n)) {
continue;
Expand All @@ -70,12 +70,12 @@ TB_CFG tb_compute_rpo2(TB_Function* f, Worklist* ws, DynArray(TB_Node*)* tmp_sta

// the start node always has it's dom depth filled
if (bb.id == 0) {
bb.dom = entry;
bb.dom_depth = 0;
} else {
bb.dom_depth = -1;
}

bb.start = entry;
bb.end = n;
dyn_array_put(ws->items, entry);
nl_map_put(cfg.node_to_block, entry, bb);
Expand Down Expand Up @@ -131,6 +131,11 @@ static int resolve_dom_depth(TB_CFG* cfg, TB_Node* bb) {
return parent + 1;
}

static TB_BasicBlock* get_pred_bb(TB_CFG* cfg, TB_Node* n, int i) {
n = get_pred(n, i);
return &nl_map_get_checked(cfg->node_to_block, n);
}

TB_DominanceFrontiers* tb_get_dominance_frontiers(TB_Function* f, TB_Passes* restrict p, TB_CFG cfg, TB_Node** blocks) {
size_t stride = (cfg.block_count + 63) / 64;
size_t elems = stride * cfg.block_count;
Expand Down Expand Up @@ -173,6 +178,10 @@ void tb_compute_dominators(TB_Function* f, TB_Passes* restrict p, TB_CFG cfg) {

void tb_compute_dominators2(TB_Function* f, Worklist* ws, TB_CFG cfg) {
TB_Node** blocks = ws->items;

TB_BasicBlock* entry = &nl_map_get_checked(cfg.node_to_block, blocks[0]);
entry->dom = entry;

bool changed = true;
while (changed) {
changed = false;
Expand Down Expand Up @@ -216,9 +225,9 @@ void tb_compute_dominators2(TB_Function* f, Worklist* ws, TB_CFG cfg) {
}

assert(new_idom != NULL);
TB_Node** dom_ptr = &nl_map_get_checked(cfg.node_to_block, b).dom;
if (*dom_ptr != new_idom) {
*dom_ptr = new_idom;
TB_BasicBlock* b_bb = &nl_map_get_checked(cfg.node_to_block, b);
if (b_bb->dom == NULL || b_bb->dom->start != new_idom) {
b_bb->dom = &nl_map_get_checked(cfg.node_to_block, new_idom);
changed = true;
}
}
Expand All @@ -234,21 +243,22 @@ void tb_compute_dominators2(TB_Function* f, Worklist* ws, TB_CFG cfg) {

TB_Node* tb_get_parent_region(TB_Node* n) {
while (n->type != TB_REGION && n->type != TB_START) {
tb_assert(n->inputs[0], "node has no have a control edge");
tb_assert(n->inputs[0], "node doesn't have a control edge");
n = n->inputs[0];
}

return n;
}

bool tb_is_dominated_by(TB_CFG cfg, TB_Node* expected_dom, TB_Node* bb) {
while (expected_dom != bb) {
TB_Node* new_bb = idom(&cfg, bb);
if (bb == new_bb) {
bool tb_is_dominated_by(TB_CFG cfg, TB_Node* expected_dom, TB_Node* n) {
TB_BasicBlock* expected = &nl_map_get_checked(cfg.node_to_block, expected_dom);
TB_BasicBlock* bb = &nl_map_get_checked(cfg.node_to_block, n);

while (bb != expected) {
if (bb->dom == bb) {
return false;
}

bb = new_bb;
bb = bb->dom;
}

return true;
Expand Down
8 changes: 4 additions & 4 deletions tb/src/opt/gcm.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ static TB_BasicBlock* find_lca(TB_Passes* p, TB_BasicBlock* a, TB_BasicBlock* b)
if (a == NULL) return b;

// line both up
while (a->dom_depth > b->dom_depth) a = nl_map_get_checked(p->scheduled, a->dom);
while (b->dom_depth > a->dom_depth) b = nl_map_get_checked(p->scheduled, b->dom);
while (a->dom_depth > b->dom_depth) a = a->dom;
while (b->dom_depth > a->dom_depth) b = b->dom;

while (a != b) {
b = idom_bb(p, b);
a = idom_bb(p, a);
b = b->dom;
a = a->dom;
}

return a;
Expand Down
10 changes: 4 additions & 6 deletions tb/src/opt/mem2reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static void add_phi_operand(Mem2Reg_Ctx* restrict c, TB_Function* f, TB_Node* ph

assert(phi_node->type == TB_PHI);
TB_Node* phi_region = phi_node->inputs[0];
DO_IF(TB_OPTDEBUG_MEM2REG)(log_debug("v%u: adding v%u to PHI", phi_node->gvn, node->gvn));
DO_IF(TB_OPTDEBUG_MEM2REG)(printf("v%u: adding v%u to PHI\n", phi_node->gvn, node->gvn));

// the slot to fill is based on the predecessor list of the region
FOREACH_N(i, 0, phi_region->input_count) {
Expand Down Expand Up @@ -121,7 +121,8 @@ static void ssa_replace_phi_arg(Mem2Reg_Ctx* c, TB_Function* f, TB_Node* bb, TB_

bool found = false;
FOREACH_N(j, 0, dst->input_count) {
if (dst->inputs[j] == bb) {
TB_Node* pred = get_pred(dst, j);
if (pred == bb) {
// try to replace
set_input(c->p, phi_reg, top, j + 1);
found = true;
Expand Down Expand Up @@ -244,7 +245,7 @@ static void ssa_rename(Mem2Reg_Ctx* c, TB_Function* f, TB_Node* bb, DynArray(TB_
// fill successors
for (User* u = end->users; u; u = u->next) {
if (cfg_is_control(u->n)) {
TB_Node* succ = cfg_next_region_control(u->n);
TB_Node* succ = cfg_get_fallthru(u->n);
ssa_replace_phi_arg(c, f, bb, succ, stack);
}
}
Expand Down Expand Up @@ -416,8 +417,6 @@ bool tb_pass_mem2reg(TB_Passes* p) {
// for each global name we'll insert phi nodes
TB_Node** phi_p = tb_tls_push(tls, c.cfg.block_count * sizeof(TB_Node*));

// tb_pass_print_dot(p, tb_default_print_callback, stdout);

NL_HashSet ever_worked = nl_hashset_alloc(c.cfg.block_count);
NL_HashSet has_already = nl_hashset_alloc(c.cfg.block_count);
FOREACH_N(var, 0, c.to_promote_count) {
Expand Down Expand Up @@ -489,7 +488,6 @@ bool tb_pass_mem2reg(TB_Passes* p) {
}

ssa_rename(&c, f, c.blocks[0], stack);
// tb_function_print(f, tb_default_print_callback, stdout);

// don't need these anymore
FOREACH_N(var, 0, c.to_promote_count) {
Expand Down
4 changes: 2 additions & 2 deletions tb/src/opt/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ static bool peephole(TB_Passes* restrict p, TB_Function* f, TB_Node* n, TB_Peeph
if (n->type != TB_END && n->type != TB_UNREACHABLE && n->users == NULL) {
DO_IF(TB_OPTDEBUG_PEEP)(printf(" => \x1b[196mKILL\x1b[0m"));
tb_pass_kill_node(p, n);
return false;
return true;
}

// idealize node (in a loop of course)
Expand Down Expand Up @@ -1007,7 +1007,7 @@ void tb_pass_peephole(TB_Passes* p, TB_PeepholeFlags flags) {
// to end the iteration of a dom chain.
TB_Node* dom = NULL;
if (i != 0) {
dom = nl_map_get_checked(cfg.node_to_block, tmp_ws.items[i]).dom;
dom = nl_map_get_checked(cfg.node_to_block, tmp_ws.items[i]).dom->start;
}

Lattice* l = lattice_ctrl(&p->universe, dom);
Expand Down
20 changes: 14 additions & 6 deletions tb/src/passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,17 @@ typedef struct {
int dst, src;
} PhiVal;

typedef struct TB_BasicBlock {
TB_Node* dom;
typedef struct TB_BasicBlock TB_BasicBlock;
struct TB_BasicBlock {
TB_BasicBlock* dom;

TB_Node* start;
TB_Node* end;
int id, dom_depth;

TB_Node* mem_in;
NL_HashSet items;
} TB_BasicBlock;
};

typedef struct TB_CFG {
size_t block_count;
Expand Down Expand Up @@ -324,15 +327,20 @@ static TB_Node* get_block_begin(TB_Node* n) {
}

static TB_BasicBlock* idom_bb(TB_Passes* p, TB_BasicBlock* bb) {
ptrdiff_t search = nl_map_get(p->scheduled, bb->dom);
return search >= 0 ? p->scheduled[search].v : NULL;
return bb->dom;
}

// shorthand because we use it a lot
static TB_Node* idom(TB_CFG* cfg, TB_Node* n) {
if (cfg->node_to_block == NULL) return NULL;

ptrdiff_t search = nl_map_get(cfg->node_to_block, n);
return search >= 0 ? cfg->node_to_block[search].v.dom : NULL;
if (search < 0) {
return NULL;
}

TB_BasicBlock* dom = cfg->node_to_block[search].v.dom;
return dom ? dom->start : NULL;
}

static int dom_depth(TB_CFG* cfg, TB_Node* n) {
Expand Down
Loading

0 comments on commit 4832ea9

Please sign in to comment.