diff --git a/.auto_run_cmd.file b/.auto_run_cmd.file new file mode 100644 index 0000000..92cdf4e --- /dev/null +++ b/.auto_run_cmd.file @@ -0,0 +1 @@ +zig test --main-pkg-path ./ ./src/ir/phi.zig diff --git a/.github/workflows/phi-test-suite.yml b/.github/workflows/phi-test-suite.yml new file mode 100644 index 0000000..8e5ef81 --- /dev/null +++ b/.github/workflows/phi-test-suite.yml @@ -0,0 +1,167 @@ +name: PHI TEST SUITE +on: + push: + branches: + - 'main' + pull_request: + branches: + - '*' +# cancel in progress checks when a push occurs +concurrency: + group: ${{ github.workflow }}-${{ github.ref_name }} + cancel-in-progress: true +jobs: + Fibonacci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test Fibonacci -phi + shell: bash + array_sort: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test array_sort -phi + shell: bash + array_sum: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test array_sum -phi + shell: bash + BenchMarkishTopics: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test BenchMarkishTopics -phi + shell: bash + bert: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test bert -phi + shell: bash + biggest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test biggest -phi + shell: bash + binaryConverter: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test binaryConverter -phi + shell: bash + brett: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test brett -phi + shell: bash + creativeBenchMarkName: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test creativeBenchMarkName -phi + shell: bash + fact_sum: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test fact_sum -phi + shell: bash + GeneralFunctAndOptimize: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test GeneralFunctAndOptimize -phi + shell: bash + hailstone: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test hailstone -phi + shell: bash + hanoi_benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test hanoi_benchmark -phi + shell: bash + killerBubbles: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test killerBubbles -phi + shell: bash + mile1: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test mile1 -phi + shell: bash + mixed: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test mixed -phi + shell: bash + OptimizationBenchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test OptimizationBenchmark -phi + shell: bash + primes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test primes -phi + shell: bash + programBreaker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test programBreaker -phi + shell: bash + stats: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test stats -phi + shell: bash + TicTac: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test TicTac -phi + shell: bash + wasteOfCycles: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/test-suite-setup + - run: just run-suite-test wasteOfCycles -phi + shell: bash diff --git a/.github/workflows/test-suite.yml b/.github/workflows/stack-test-suite.yml similarity index 99% rename from .github/workflows/test-suite.yml rename to .github/workflows/stack-test-suite.yml index 6aabfae..ec70d5a 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/stack-test-suite.yml @@ -1,4 +1,4 @@ -name: Test Suite +name: STACK TEST SUITE on: push: branches: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 952f4d0..8be4dc4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: Tests (zig files) +name: ZIG TEST on: push: branches: diff --git a/.gitignore b/.gitignore index 3fc90cb..addd532 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,10 @@ zig-cache zig-out /kcov-output/ TODO.dylan +lazygit +lazygit.tar.gz +.auto_run_cmd.file +clang+llvm-7.0.1-x86_64-linux-gnu-ubuntu-18.04.tar.xz +.zig_debug_last_executable.txt +tmp.ll +tmp.s diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 6a392ba..a969ea2 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -3,5 +3,6 @@ + \ No newline at end of file diff --git a/Justfile b/Justfile index a35b483..109cc69 100644 --- a/Justfile +++ b/Justfile @@ -17,7 +17,7 @@ make path exe="a.out": build ensure-test-suite: git submodule update --init --recursive -run-suite: ensure-test-suite +run-suite *BUILD_ARGS: ensure-test-suite #!/usr/bin/env bash set -uo pipefail @@ -26,7 +26,7 @@ run-suite: ensure-test-suite NC='\033[0m' for test in $(ls {{TEST_SUITE}}); do - just run-suite-test $test > /dev/null 2>&1 + just run-suite-test $test {{BUILD_ARGS}} > /dev/null 2>&1 if [ $? -eq 0 ]; then echo -e "${GREEN}SUCCESS${NC} - ${test}" else @@ -34,7 +34,7 @@ run-suite: ensure-test-suite fi done -run-suite-test name: ensure-test-suite +run-suite-test name *BUILD_ARGS: ensure-test-suite #!/usr/bin/env bash set -euo pipefail @@ -46,7 +46,7 @@ run-suite-test name: ensure-test-suite echo -e "Running Test Suite Test: ${YELLOW}{{name}}${NC}" echo -e "${BLUE}Building Test Suite Test...${NC}" - just build-suite-test {{name}} + just build-suite-test {{name}} {{BUILD_ARGS}} echo -e "${GREEN}BUILD SUCCESS${NC}" dir="{{TEST_SUITE}}/{{name}}" @@ -54,7 +54,8 @@ run-suite-test name: ensure-test-suite echo "Checking Normal Input..." - diff <($bin < "$dir/input") "$dir/output.expected" || true + $bin < "$dir/input" > "$dir/output" + diff "$dir/output" "$dir/output.expected" if [ $? -eq 0 ]; then echo -e "${GREEN}SUCCESS${NC}" else @@ -62,18 +63,28 @@ run-suite-test name: ensure-test-suite fi echo "Checking Longer Input..." - diff <($bin < "$dir/input.longer") "$dir/output.longer.expected" || true - if [ $? -eq 0 ]; then - echo -e "${GREEN}SUCCESS${NC}" + longer="$dir/input.longer" + if [ -f "$longer" ]; then + $bin < "$longer" > "$dir/output.longer" + diff "$dir/output.longer" "$dir/output.longer.expected" + if [ $? -eq 0 ]; then + echo -e "${GREEN}SUCCESS${NC}" + else + echo -e "${RED}FAIL${NC}" + fi else - echo -e "${RED}FAIL${NC}" + echo "Longer Input Not Found" + echo -e "${GREEN}SUCCESS${NC}" fi -build-suite-test name: build +build-suite-test name *BUILD_ARGS: build #!/usr/bin/env bash set -euxo pipefail name="{{name}}" name="${name#array_}" dir="{{TEST_SUITE}}/{{name}}" - {{minipp}} -i "$dir/${name}.mini" -o "$dir/{{name}}.ll" + {{minipp}} -i "$dir/${name}.mini" -o "$dir/{{name}}.ll" {{BUILD_ARGS}} clang "$dir/{{name}}.ll" -o "$dir/{{name}}" + +nix: + sudo nix develop --extra-experimental-features nix-command --extra-experimental-features flakes diff --git a/build.zig b/build.zig index 58ae487..08d8e3d 100644 --- a/build.zig +++ b/build.zig @@ -10,8 +10,10 @@ const files = [_]SourceFile{ .{ .path = "src/parser.zig", .name = "parser" }, .{ .path = "src/sema.zig", .name = "sema" }, .{ .path = "src/utils.zig", .name = "utils" }, - .{ .path = "src/ir/ir.zig", .name = "ir" }, - .{ .path = "src/ir/stack.zig", .name = "ir-stack" }, + .{ .path = "src/ir/ir.zig", .name = "stack-ir" }, + .{ .path = "src/ir/stack.zig", .name = "stack-ir-gen" }, + .{ .path = "src/ir/phi.zig", .name = "phi-ir-gen" }, + .{ .path = "src/ir/ir_phi.zig", .name = "phi-ir" }, }; pub fn build(b: *std.Build) void { diff --git a/mini.ebnf b/mini.ebnf index 2040aaa..b4836e4 100644 --- a/mini.ebnf +++ b/mini.ebnf @@ -29,7 +29,7 @@ relterm -> simple {{ '<' | '>' | '<=' | '>=' } simple}∗ simple -> term {{ '+' | '−' } term}∗ term -> unary {{ '∗' | '/' } unary}∗ unary -> { '!' | '−' }∗ selector -selector -> factor {'.' id}∗ +selector -> factor {{'.' id} | '[' number ']'}∗ factor -> '(' expression ')' | id {arguments}opt | number | 'true' | | 'false' | 'new' id | 'null' | 'new' 'int_array' '[' number ']' | arguments -> '(' {expression { ',' expression}∗}opt ')' diff --git a/src/array_hash_set.zig b/src/array_hash_set.zig new file mode 100644 index 0000000..545ed77 --- /dev/null +++ b/src/array_hash_set.zig @@ -0,0 +1,462 @@ +/// Based of the work of Ralph Caraveo { +/// Open Source Initiative OSI - The MIT License (MIT):Licensing +/// The MIT License (MIT) +/// Copyright (c) 2024 Ralph Caraveo (deckarep@gmail.com) +/// Permission is hereby granted, free of charge, to any person obtaining a copy of +/// this software and associated documentation files (the "Software"), to deal in +/// the Software without restriction, including without limitation the rights to +/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +/// of the Software, and to permit persons to whom the Software is furnished to do +/// so, subject to the following conditions: +/// The above copyright notice and this permission notice shall be included in all +/// copies or substantial portions of the Software. +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +/// SOFTWARE. +///} +/// +/// +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; + +pub fn Set(comptime E: type) type { + return struct { + pub const Map = std.AutoArrayHashMapUnmanaged(E, void); + + pub const Self = @This(); + unmanaged: Map, + pub const Size = usize; + + pub const Entry = struct { + key_ptr: *E, + }; + + pub const Iterator = struct { + keys: [*]E, + len: usize, + index: usize = 0, + + pub fn next(self: *Iterator) ?Entry { + if (self.index >= self.len) return null; + const result = Entry{ .key_ptr = &self.keys[self.index] }; + self.index += 1; + return result; + } + + pub fn reset(self: *Iterator) void { + self.index = 0; + } + }; + + pub fn print(self: Self) void { + std.debug.print("Set (", .{}); + var iter = self.iterator(); + while (iter.next()) |entry| { + std.debug.print("{any}", .{entry.key_ptr.*}); + std.debug.print(", ", .{}); + } + std.debug.print(")", .{}); + } + + pub fn init() Self { + return .{ + .unmanaged = Map{}, + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.unmanaged.deinit(allocator); + self.* = undefined; + } + + pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { + const prevCount = self.unmanaged.count(); + try self.unmanaged.put(allocator, element, {}); + return prevCount != self.unmanaged.count(); + } + + /// Appends all elements from the provided slice, and may allocate. + /// appendSlice returns an Allocator.Error or Size which represents how + /// many elements added and not previously in the slice. + pub fn appendSlice(self: *Self, allocator: Allocator, elements: []const E) Allocator.Error!Size { + const prevCount = self.unmanaged.count(); + for (elements) |el| { + try self.unmanaged.put(allocator, el, {}); + } + return self.unmanaged.count() - prevCount; + } + + /// Returns the number of total elements which may be present before + /// it is no longer guaranteed that no allocations will be performed. + pub fn capacity(self: *Self) Size { + // Note: map.capacity() requires mutable access, probably an oversight. + return self.unmanaged.capacity(); + } + + /// Cardinality effectively returns the size of the set. + pub fn cardinality(self: Self) Size { + return self.unmanaged.count(); + } + + /// Invalidates all element pointers. + pub fn clearAndFree(self: *Self, allocator: Allocator) void { + self.unmanaged.clearAndFree(allocator); + } + + /// Invalidates all element pointers. + pub fn clearRetainingCapacity(self: *Self) void { + self.unmanaged.clearRetainingCapacity(); + } + + /// Creates a copy of this set, using the same allocator. + /// clone may return an Allocator.Error or the cloned Set. + pub fn clone(self: *Self, allocator: Allocator) Allocator.Error!Self { + // Take a stack copy of self. + var cloneSelf = self.*; + // Clone the interal map. + cloneSelf.unmanaged = try self.unmanaged.clone(allocator); + return cloneSelf; + } + + /// Returns true when the provided element exists within the Set otherwise false. + pub fn contains(self: Self, element: E) bool { + return self.unmanaged.contains(element); + } + + /// Returns true when all elements in the other Set are present in this Set + /// otherwise false. + pub fn containsAll(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (!self.unmanaged.contains(el.*)) { + return false; + } + } + return true; + } + + /// Returns true when all elements in the provided slice are present otherwise false. + pub fn containsAllSlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (!self.unmanaged.contains(el)) { + return false; + } + } + return true; + } + + /// Returns true when at least one or more elements from the other Set exist within + /// this Set otherwise false. + pub fn containsAny(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (self.unmanaged.contains(el.*)) { + return true; + } + } + return false; + } + + pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, num: Size) Allocator.Error!void { + return self.unmanaged.ensureTotalCapacity(allocator, num); + } + + /// differenceOf returns the difference between this set + /// and other. The returned set will contain + /// all elements of this set that are not also + /// elements of the other. + /// + /// Caller owns the newly allocated/returned set. + pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var diffSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try diffSet.add(allocator, entry.key_ptr.*); + } + } + return diffSet; + } + + /// differenceUpdate does an in-place mutation of this set + /// and other. This set will contain all elements of this set that are not + /// also elements of other. + pub fn differenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const diffSet = try self.differenceOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = diffSet.unmanaged; + } + + /// Returns true when at least one or more elements from the slice exist within + /// this Set otherwise false. + pub fn containsAnySlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (self.unmanaged.contains(el)) { + return true; + } + } + return false; + } + + /// eql determines if two sets are equal to each + /// other. If they have the same cardinality + /// and contain the same elements, they are + /// considered equal. The order in which + /// the elements were added is irrelevant. + pub fn eql(self: Self, other: Self) bool { + // First discriminate on cardinalities of both sets. + if (self.unmanaged.count() != other.unmanaged.count()) { + return false; + } + + // Now check for each element one for one and exit early + // on the first non-match. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// intersectionOf returns a new set containing only the elements + /// that exist only in both sets. + /// + /// Caller owns the newly allocated/returned set. + pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var interSet = Self.init(); + + // Optimization: iterate over whichever set is smaller. + // Matters when disparity in cardinality is large. + var s = other; + var o = self; + if (self.unmanaged.count() < other.unmanaged.count()) { + s = self; + o = other; + } + + var iter = s.unmanaged.iterator(); + while (iter.next()) |entry| { + if (o.unmanaged.contains(entry.key_ptr.*)) { + _ = try interSet.add(allocator, entry.key_ptr.*); + } + } + + return interSet; + } + + /// intersectionUpdate does an in-place intersecting update + /// to the current set from the other set keeping only + /// elements found in this Set and the other Set. + pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const interSet = try self.intersectionOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = interSet.unmanaged; + } + + pub fn isEmpty(self: Self) bool { + return self.unmanaged.count() == 0; + } + + /// Create an iterator over the elements in the set. + /// The iterator is invalidated if the set is modified during iteration. + pub fn iterator(self: Self) Iterator { + const slice = self.unmanaged.entries.slice(); + return .{ + .keys = slice.items(.key).ptr, + .len = @as(u32, @intCast(slice.len)), + }; + } + + /// properSubsetOf determines if every element in this set is in + /// the other set but the two sets are not equal. + pub fn properSubsetOf(self: Self, other: Self) bool { + return self.unmanaged.count() < other.unmanaged.count() and self.subsetOf(other); + } + + /// properSupersetOf determines if every element in the other set + /// is in this set but the two sets are not equal. + pub fn properSupersetOf(self: Self, other: Self) bool { + return self.unmanaged.count() > other.unmanaged.count() and self.supersetOf(other); + } + + /// subsetOf determines if every element in this set is in + /// the other set. + pub fn subsetOf(self: Self, other: Self) bool { + // First discriminate on cardinalties of both sets. + if (self.unmanaged.count() > other.unmanaged.count()) { + return false; + } + + // Now check that self set has at least some elements from other. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// subsetOf determines if every element in the other Set is in + /// the this Set. + pub fn supersetOf(self: Self, other: Self) bool { + // This is just the converse of subsetOf. + return other.subsetOf(self); + } + + /// pop removes and returns an arbitrary ?E from the set. + /// Order is not guaranteed. + /// This safely returns null if the Set is empty. + pub fn pop(self: *Self) ?E { + if (self.unmanaged.count() > 0) { + var iter = self.unmanaged.iterator(); + // NOTE: No in-place mutation as it invalidates live iterators. + // So a temporary capture is taken. + var capturedElement: E = undefined; + while (iter.next()) |entry| { + capturedElement = entry.key_ptr.*; + break; + } + _ = self.unmanaged.swapRemove(capturedElement); + return capturedElement; + } else { + return null; + } + } + + /// remove discards a single element from the Set + pub fn remove(self: *Self, element: E) bool { + return self.unmanaged.swapRemove(element); + } + + /// removesAll discards all elements passed from the other Set from + /// this Set + pub fn removeAll(self: *Self, other: Self) void { + var iter = other.iterator(); + while (iter.next()) |el| { + _ = self.unmanaged.swapRemove(el.key_ptr.*); + } + } + + /// removesAllSlice discards all elements passed as a slice from the Set + pub fn removeAllSlice(self: *Self, elements: []const E) void { + for (elements) |el| { + _ = self.unmanaged.swapRemove(el); + } + } + + /// symmetricDifferenceOf returns a new set with all elements which are + /// in either this set or the other set but not in both. + /// + /// The caller owns the newly allocated/returned Set. + pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var sdSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!self.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + return sdSet; + } + + /// symmetricDifferenceUpdate does an in-place mutation with all elements + /// which are in either this set or the other set but not in both. + pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const sd = try self.symmetricDifferenceOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = sd.unmanaged; + } + + /// union returns a new set with all elements in both sets. + /// + /// The caller owns the newly allocated/returned Set. + pub fn unionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + // Sniff out larger set for capacity hint. + var n = self.unmanaged.count(); + if (other.unmanaged.count() > n) n = other.unmanaged.count(); + + var uSet = try Self.initCapacity( + allocator, + @intCast(n), + ); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + return uSet; + } + + /// unionUpdate does an in-place union of the current Set and other Set. + /// + /// Allocations may occur. + pub fn unionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + var iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try self.add(allocator, entry.key_ptr.*); + } + } + }; +} diff --git a/src/ast.zig b/src/ast.zig index e6625fe..fb92a12 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -34,6 +34,32 @@ pub fn mapStructs(ast: *Ast) !void { } } +pub fn debugPrintAst(self: *const Ast) void { + var i: usize = 0; + const nodes = self.nodes.items; + std.debug.print("AST PRINT START\n", .{}); + for (nodes) |node| { + const kind = node.kind; + const token = node.token; + std.debug.print("{d}: {s} {s}", .{ i, @tagName(kind), token._range.getSubStrFromStr(self.input) }); + switch (kind) { + .BinaryOperation => { + const binOp = node.kind.BinaryOperation; + std.debug.print(" lhs: {any}\n", .{binOp.lhs}); + std.debug.print(" rhs: {any}\n", .{binOp.rhs}); + }, + .Expression => { + const expr = node.kind.Expression; + const last = expr.last; + std.debug.print(" last: {d}", .{last}); + }, + else => {}, + } + std.debug.print("\n", .{}); + i += 1; + } + std.debug.print("AST PRINT END\n", .{}); +} pub fn printAst(self: *const Ast) void { var i: usize = 0; const nodes = self.nodes.items; @@ -55,6 +81,104 @@ pub fn printAst(self: *const Ast) void { log.trace("AST PRINT END\n", .{}); } +pub fn arrayStringsToString(self: *const Ast, arr: std.ArrayList(u8)) ![]u8 { + var strbuff = std.ArrayList(u8).init(self.allocator); + defer strbuff.deinit(); + for (arr.items) |str| { + try strbuff.append(str); + } + return try strbuff.toOwnedSlice(); +} + +pub fn selectorChainToString(self: *const Ast, chainID: ?usize) ![]u8 { + var cId = chainID; + var strbuff = std.ArrayList(u8).init(self.allocator); + defer strbuff.deinit(); + while (cId != null) { + var chainNode = self.get(cId.?).*; + var ident = chainNode.kind.SelectorChain.ident; + var identNode = self.get(ident).*; + switch (identNode.kind) { + .Identifier => { + // const identss = identNode.kind.Identifier; + const identName = self.getIdentValue(ident); + try strbuff.append('.'); + for (identName) |c| { + try strbuff.append(c); + } + cId = chainNode.kind.SelectorChain.next; + }, + else => { + return try self.arrayStringsToString(strbuff); + }, + } + } + return try self.arrayStringsToString(strbuff); +} + +pub fn lvalToString(self: *const Ast, lvalID: usize) ![]u8 { + const lvalNode = self.get(lvalID).*; + const lvalKind = lvalNode.kind; + var strbuff = std.ArrayList(u8).init(self.allocator); + defer strbuff.deinit(); + switch (lvalKind) { + .LValue => { + const lval = lvalKind.LValue; + const identName = self.getIdentValue(lval.ident); + for (identName) |c| { + try strbuff.append(c); + } + if (lval.chain != null) { + const chainStr = try self.selectorChainToString(lval.chain.?); + for (chainStr) |c| { + try strbuff.append(c); + } + } + }, + else => { + unreachable; + }, + } + return try self.arrayStringsToString(strbuff); +} + +pub fn selectorToString(self: *const Ast, selectorId: usize) ![]u8 { + const selectorNode = self.get(selectorId).*; + const selectorKind = selectorNode.kind; + var strbuff = std.ArrayList(u8).init(self.allocator); + defer strbuff.deinit(); + switch (selectorKind) { + .Selector => { + const selector = selectorKind.Selector; + const factor = selector.factor; + const factorNode = self.get(factor).*; + const factorFactor = factorNode.kind.Factor.factor; + const factorFactorNode = self.get(factorFactor).*; + switch (factorFactorNode.kind) { + .Identifier => { + const identName = self.getIdentValue(factorFactor); + for (identName) |c| { + try strbuff.append(c); + } + }, + else => { + return try self.arrayStringsToString(strbuff); + }, + } + if (selector.chain != null) { + const chainStr = try self.selectorChainToString(selector.chain.?); + for (chainStr) |c| { + try strbuff.append(c); + } + } + }, + else => { + unreachable; + }, + } + return try self.arrayStringsToString(strbuff); +} + pub fn mapFunctions(ast: *Ast) !void { const nodes = ast.nodes.items; var i: usize = 0; @@ -351,6 +475,72 @@ pub const Node = struct { /// Pointer to `Statement` /// null if only one statement lastStatement: ?Ref(.Statement) = null, + + pub const EmptyStatementIter = StatementsIter.init( + undefined, + 1, + 0, + ); + pub const StatementsIter = struct { + first: usize, + last: usize, + i: usize, + ast: *const Ast, + + pub fn init(ast: *const Ast, firstStmt: usize, lastStmt: ?usize) StatementsIter { + const last: usize = lastStmt orelse firstStmt + 1; + const i: usize = firstStmt; + + return .{ + .first = i, + .last = last, + .i = i, + .ast = ast, + }; + } + pub fn next(self: *StatementsIter) ?Ast.Node { + if (self.i > self.last) { + return null; + } + const stmt = self.ast.get(self.i).*; + // Move to the next argument, considering nested Arguments and ArgumentEnds + var cursor = self.i + 1; + while (cursor <= self.last) : (cursor += 1) { + const node = self.ast.get(cursor).*; + if (node.kind == .Statement) { + break; + } + if (node.kind == .StatementList) { + cursor = (node.kind.StatementList.lastStatement orelse node.kind.StatementList.firstStatement); + } + } + + self.i = cursor; + + return stmt; + } + + pub fn calculateLen(self: StatementsIter) usize { + // create a copy of the iterator with the initial state + // (i == first) so we do not mutate the original iterator + var copy = StatementsIter{ .ast = self.ast, .i = self.first, .first = self.first, .last = self.last }; + var length: usize = 0; + // the |_| is needed so zig realizes I want them to go until + // next is null, otherwise get `expected bool` compile error + while (copy.next()) |_| : (length += 1) { + // do nothing + } + return length; + } + }; + + pub fn iter(self: @This(), ast: *const Ast) StatementsIter { + return StatementsIter.init( + ast, + self.firstStatement, + self.lastStatement, + ); + } }, /// Statement holds only one field, the index of the actual statement /// it is still usefull, however, as the possible statements are vast, @@ -369,6 +559,14 @@ pub const Node = struct { .Invocation, }), finalIndex: usize, + + pub fn isControlFlow(self: @This(), ast: *const Ast) bool { + const node = ast.get(self.statement); + switch (node.kind) { + .ConditionalIf, .While, .Return, .Block => return true, + else => return false, + } + } }, Block: BlockType, @@ -409,8 +607,8 @@ pub const Node = struct { ident: Ref(.Identifier), /// Pointer to `SelectorChain` (`{'.'id}*`) /// null if no selectors - chain: ?Ref(.SelectorChain) = null, // TODO: for adding the int_array access this will need to be changed + chain: ?Ref(.SelectorChain) = null, }, Expression: ExpressionType, BinaryOperation: struct { @@ -810,8 +1008,7 @@ pub const Node = struct { .BoolType => return .Bool, .IntType => return .Int, .StructType => { - const nameToken = ast.get(tyNode.structIdentifier.?).token; - const name = nameToken._range.getSubStrFromStr(ast.input); + const name = ast.getIdentValue(tyNode.structIdentifier.?); return .{ .Struct = name }; }, .IntArrayType => return .IntArray, @@ -893,6 +1090,13 @@ pub const Node = struct { } }; }; + + pub fn isStatement(self: Node) bool { + return switch (self.kind) { + .Statement => true, + else => false, + }; + } }; pub const Type = union(enum) { @@ -924,7 +1128,10 @@ pub const Type = union(enum) { // hurt right? // // right? - .Null => other == .Struct or other == .Null, + .Null => switch (other) { + .Struct, .Null => true, + else => false, + }, else => @intFromEnum(self) == @intFromEnum(other), }; // Dylan I see what you were going for here I just don't like it ;) @@ -939,6 +1146,15 @@ pub const Type = union(enum) { // } // return tmp == 0; } + + pub fn isOneOf(self: Self, comptime others: anytype) bool { + inline for (others) |other| { + if (self.equals(other)) { + return true; + } + } + return false; + } }; pub fn generateTypeInt() Type { @@ -953,7 +1169,7 @@ pub fn generateTypeInt() Type { // '<,'>g/: struct/norm f:dt{da{ // '<,'>g://:d // '<,'>g:^\s*$:d -const KindTagDupe = enum { +pub const KindTagDupe = enum { Program, ProgramDeclarations, Types, @@ -977,8 +1193,8 @@ const KindTagDupe = enum { ReturnType, FunctionBody, LocalDeclarations, - ReturnTypedIdentifier, TypedIdentifier, + ReturnTypedIdentifier, StatementList, Statement, Block, @@ -1002,6 +1218,7 @@ const KindTagDupe = enum { True, False, New, + NewIntArray, Null, BackfillReserve, }; @@ -1025,6 +1242,10 @@ fn RefOneOf(comptime tags: anytype) type { const NodeKindTag = @typeInfo(Node.Kind).Union.tag_type.?; +pub fn NodeKindType(comptime tag: NodeKindTag) type { + return @typeInfo(Node.Kind).Union.fields[@intFromEnum(tag)].type; +} + fn cmpNodeKindAndTag(node: Node, nkTag: NodeKindTag) bool { return @intFromEnum(node.kind) == @intFromEnum(nkTag); } @@ -1077,7 +1298,7 @@ pub fn findIndexWithin(ast: *const Ast, nodeKind: NodeKindTag, start: usize, end if (start >= ast.nodes.items.len) { return null; } - for (ast.nodes.items[start..end], start..) |node, i| { + for (ast.nodes.items[start..@min(end, ast.nodes.items.len)], start..) |node, i| { if (cmpNodeKindAndTag(node, nodeKind)) { return i; } @@ -1159,7 +1380,21 @@ pub fn NodeIter(comptime tag: NodeKindTag) type { return null; } // PERF: use a hashmap to store the indexes of the functions - const nodeIndex = self.ast.findIndex(tag, self.i); + const nodeIndex = self.ast.findIndexWithin(tag, self.i, self.last + 1); + if (nodeIndex) |i| { + self.i = i + 1; + const n = self.ast.nodes.items[i]; + return n; + } + self.i = self.last + 1; + return null; + } + pub fn nextInc(self: *Self) ?Node { + if (self.i > self.last) { + return null; + } + // PERF: use a hashmap to store the indexes of the functions + const nodeIndex = self.ast.findIndexWithin(tag, self.i, self.last + 1); if (nodeIndex) |i| { self.i = i + 1; const n = self.ast.nodes.items[i]; @@ -1198,12 +1433,16 @@ pub fn iterFuncs(ast: *const Ast) FuncIter { } pub fn printNodeLine(ast: *const Ast, node: Node) void { + printNodeLineTo(ast, node, std.debug.print); +} + +pub fn printNodeLineTo(ast: *const Ast, node: Node, comptime printer: fn (comptime fmt: []const u8, args: anytype) void) void { const input = ast.input; const tok = node.token; const tok_start = tok._range.start; const tok_end = tok._range.end; var line_start: usize = tok_start; - while (line_start >= 0 and input[line_start] != '\n') : (line_start -= 1) {} + while (line_start > 0 and input[line_start] != '\n') : (line_start -= 1) {} line_start += 1; var line_end: usize = tok_end; while (line_end < input.len and input[line_end] != '\n') : (line_end += 1) {} @@ -1216,7 +1455,7 @@ pub fn printNodeLine(ast: *const Ast, node: Node) void { } } const col_no = tok_start - line_start; - std.debug.print("LINE {d}:{d} \"{s}\"\n", .{ line_no, col_no, line }); + @call(.auto, printer, .{ "LINE {d}:{d} \"{s}\"\n", .{ line_no, col_no, line } }); } const ting = std.testing; @@ -1379,3 +1618,24 @@ test "ast.int_array_access" { var ast = try testMe(input); _ = ast; } + +// test "parser.printlvalue" { +// const source = "struct S{struct S s;}; fun main() void {struct S s; int_array a; s.s.s.s.s.s.s.s.s.s.s.s = 22+500 + a[0] + s.s.s.s.s; a = new int_array[10]; a[0] = 1;}"; +// var ast = try testMe(source); +// var count: u32 = 0; +// ast.debugPrintAst(); +// for (ast.nodes.items) |node| { +// switch (node.kind) { +// .LValue => { +// const str = try ast.lvalToString(count); +// std.debug.print("{s}\n", .{str}); +// }, +// .Selector => { +// const str = try ast.selectorToString(count); +// std.debug.print("{s}\n", .{str}); +// }, +// else => {}, +// } +// count += 1; +// } +// } diff --git a/src/ast/stringify.zig b/src/ast/stringify.zig new file mode 100644 index 0000000..26bbe83 --- /dev/null +++ b/src/ast/stringify.zig @@ -0,0 +1,447 @@ +const std = @import("std"); +const Ast = @import("../ast.zig"); +const utils = @import("../utils.zig"); +const log = @import("../log.zig"); + +pub fn print_tree(ast: *const Ast) !void { + const tree = try into_tree(ast); + // for (tree.children.items, 0..) |child, i| { + // const is_last = i == tree.children.items.len - 1; + // _ = is_last; + // std.debug.print("{s}", .{try child.print()}); + // } + std.debug.print("\n{s}\n", .{try tree.print()}); +} + +pub fn into_tree(ast: *const Ast) !TreeNode { + // var baseAlloc = ast.allocator; + // var baseAlloc = std.heap.page_allocator; + // var arena = std.heap.ArenaAllocator.init(baseAlloc); + // defer arena.deinit(); + // var alloc = arena.allocator(); + var alloc = ast.allocator; + + const root = try expr_into_treenode(alloc, ast, ast.get(0).*); + + return root; +} + +pub const TreeNode = struct { + data: []const u8, + children: std.ArrayList(TreeNode), + alloc: std.mem.Allocator, + + const Self = @This(); + + // Create a new tree node + fn init(alloc: std.mem.Allocator, data: []const u8) Self { + return TreeNode{ + .data = data, + .children = std.ArrayList(TreeNode).init(alloc), + .alloc = alloc, + }; + } + + // Add a child node directly to this node + fn add_node(self: *Self, data: TreeNode) !void { + try self.children.append(data); + } + + const Str = std.ArrayList(u8); + + const Writer = std.ArrayList(u8).Writer; + // Print the tree + // TODO: take writer so we can swap between stdout and stderr + pub fn print(self: *const Self) ![]const u8 { + var str = Str.init(self.alloc); + var writer = str.writer(); + + _ = try writer.write(self.data); + _ = try writer.write("\n"); + + const children = self.children; + + var last_index: usize = children.items.len; + if (last_index > 0) { + last_index -= 1; + } + + for (self.children.items, 0..) |child, index| { + try child.print_child( + &writer, + "", + index == last_index, + ); + } + + return str.items; + } + + fn print_child(self: *const Self, str: *Writer, prefix: []const u8, is_last: bool) !void { + _ = try str.write(prefix); + _ = try str.write(if (is_last) "└─ " else "├─ "); + _ = try str.write(self.data); + _ = try str.write("\n"); + const new_prefix = try std.fmt.allocPrint(self.alloc, "{s}{s}", .{ prefix, if (is_last) " " else "│ " }); + defer self.alloc.free(new_prefix); + + const children = self.children; + + var last_index: usize = children.items.len; + if (last_index > 0) { + last_index -= 1; + } + + for (self.children.items, 0..) |child, index| { + try child.print_child( + str, + new_prefix, + index == last_index, + ); + } + } +}; + +fn expr_into_treenode(alloc: std.mem.Allocator, ast: *const Ast, node: Ast.Node) !TreeNode { + const data = try repr_node(alloc, ast, node); + var node_t = TreeNode.init(alloc, data); + + switch (node.kind) { + .Program => |prog| { + const progDecls_t = try expr_into_treenode(alloc, ast, ast.get(prog.declarations).*); + try node_t.add_node(progDecls_t); + const funcs_t = try expr_into_treenode(alloc, ast, ast.get(prog.functions).*); + try node_t.add_node(funcs_t); + }, + .ProgramDeclarations => |progDecls| { + if (progDecls.types) |types| { + const types_t = try expr_into_treenode(alloc, ast, ast.get(types).*); + try node_t.add_node(types_t); + } + if (progDecls.declarations) |decls| { + var decls_t = try expr_into_treenode(alloc, ast, ast.get(decls).*); + decls_t.data = "Globals"; + try node_t.add_node(decls_t); + } + }, + .Types => |types| { + var iter = Ast.NodeIter(.TypeDeclaration).init(ast, types.firstType, types.lastType); + while (iter.next()) |typeDecl| { + const typeDecl_t = try expr_into_treenode(alloc, ast, typeDecl); + try node_t.add_node(typeDecl_t); + } + }, + .TypeDeclaration => |tDecl| { + const tDecl_t = try expr_into_treenode(alloc, ast, ast.get(tDecl.declarations).*); + try node_t.add_node(tDecl_t); + }, + .StructFieldDeclarations => |fieldDecls| { + var iter = fieldDecls.iter(ast); + while (iter.next()) |fieldDecl| { + const fieldDecl_t = try expr_into_treenode(alloc, ast, fieldDecl); + try node_t.add_node(fieldDecl_t); + } + }, + .LocalDeclarations => |localDecls| { + var iter = localDecls.iter(ast); + log.trace("num locals - {d}\n", .{iter.calculateLen()}); + while (iter.next()) |tIdent| { + const tIdent_t = try expr_into_treenode(alloc, ast, tIdent); + try node_t.add_node(tIdent_t); + } + }, + .Functions => |funcs| { + var iter = Ast.NodeIter(.Function).init(ast, funcs.firstFunc, funcs.lastFunc); + while (iter.next()) |funDef| { + const funDef_t = try expr_into_treenode(alloc, ast, funDef); + try node_t.add_node(funDef_t); + } + }, + .Function => |func| { + const proto = ast.get(func.proto).*; + const proto_t = try expr_into_treenode(alloc, ast, proto); + try node_t.add_node(proto_t); + const body_t = try expr_into_treenode(alloc, ast, ast.get(func.body).*); + try node_t.add_node(body_t); + }, + .FunctionBody => |body| { + if (body.declarations) |decls| { + const decls_t = try expr_into_treenode(alloc, ast, ast.get(decls).*); + try node_t.add_node(decls_t); + } + if (body.statements) |stmts| { + const stmts_t = try expr_into_treenode(alloc, ast, ast.get(stmts).*); + try node_t.add_node(stmts_t); + } + }, + .FunctionProto => |proto| { + if (proto.parameters) |params| { + const params_t = try expr_into_treenode(alloc, ast, ast.get(params).*); + try node_t.add_node(params_t); + } + }, + .Parameters => |params| { + var iter = params.iter(ast); + while (iter.next()) |param| { + const param_t = try expr_into_treenode(alloc, ast, param); + try node_t.add_node(param_t); + } + }, + .Invocation => |funCall| { + if (funCall.args) |args| { + const arg_t = try expr_into_treenode(alloc, ast, ast.get(args).*); + try node_t.add_node(arg_t); + } + }, + .Arguments => |args| { + var argsIter = args.iter(ast); + while (argsIter.next()) |arg| { + const arg_t = try expr_into_treenode(alloc, ast, arg); + try node_t.add_node(arg_t); + } + }, + .StatementList => |stmtList| { + var iter = Ast.NodeIter(.Statement).init(ast, stmtList.firstStatement, stmtList.lastStatement); + while (iter.next()) |stmt| { + const stmt_t = try expr_into_treenode(alloc, ast, stmt); + iter.skipTo(stmt.kind.Statement.finalIndex); + try node_t.add_node(stmt_t); + } + }, + .Statement => |stmt| { + var stmt_t = try expr_into_treenode(alloc, ast, ast.get(stmt.statement).*); + try node_t.add_node(stmt_t); + }, + .Assignment => |assign| { + const lhs = ast.get(assign.lhs).*; + const lhs_t = try expr_into_treenode(alloc, ast, lhs); + try node_t.add_node(lhs_t); + const rhs = ast.get(assign.rhs).*; + const rhs_t = try expr_into_treenode(alloc, ast, rhs); + try node_t.add_node(rhs_t); + }, + .LValue => |lval| { + if (lval.chain) |chain| { + const chain_t = try expr_into_treenode(alloc, ast, ast.get(chain).*); + try node_t.add_node(chain_t); + } + }, + .Expression => |expr| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(expr.expr).*); + try node_t.add_node(expr_t); + }, + .BinaryOperation => |binop| { + const lhs = ast.get(binop.lhs).*; + const lhs_t = try expr_into_treenode(alloc, ast, lhs); + try node_t.add_node(lhs_t); + const rhs = ast.get(binop.rhs).*; + const rhs_t = try expr_into_treenode(alloc, ast, rhs); + try node_t.add_node(rhs_t); + }, + .UnaryOperation => |unop| { + const expr = ast.get(unop.on).*; + const expr_t = try expr_into_treenode(alloc, ast, expr); + try node_t.add_node(expr_t); + }, + .Selector => |sel| { + const expr = ast.get(sel.factor).*; + var expr_t = try expr_into_treenode(alloc, ast, expr); + if (sel.chain) |chain| { + const chain_t = try expr_into_treenode(alloc, ast, ast.get(chain).*); + try expr_t.add_node(chain_t); + } + try node_t.add_node(expr_t); + }, + .Factor => |factor| { + const expr = ast.get(factor.factor).*; + const expr_t = try expr_into_treenode(alloc, ast, expr); + try node_t.add_node(expr_t); + }, + .SelectorChain => |chain| { + if (chain.next) |next| { + const chain_t = try expr_into_treenode(alloc, ast, ast.get(next).*); + try node_t.add_node(chain_t); + } + }, + .While => |whileNode| { + const cond = ast.get(whileNode.cond).*; + const cond_t = try expr_into_treenode(alloc, ast, cond); + try node_t.add_node(cond_t); + const body = ast.get(whileNode.block).*; + const body_t = try expr_into_treenode(alloc, ast, body); + try node_t.add_node(body_t); + }, + .Block => |block| { + if (block.statements) |stmts| { + const stmts_t = try expr_into_treenode(alloc, ast, ast.get(stmts).*); + try node_t.add_node(stmts_t); + } + }, + .ConditionalIf => |ifNode| { + const cond = ast.get(ifNode.cond).*; + const cond_t = try expr_into_treenode(alloc, ast, cond); + try node_t.add_node(cond_t); + const body = ast.get(ifNode.block).*; + const body_t = try expr_into_treenode(alloc, ast, body); + try node_t.add_node(body_t); + }, + .ConditionalIfElse => |elseNode| { + const then_t = try expr_into_treenode(alloc, ast, ast.get(elseNode.ifBlock).*); + try node_t.add_node(then_t); + const else_t = try expr_into_treenode(alloc, ast, ast.get(elseNode.elseBlock).*); + try node_t.add_node(else_t); + }, + .Return => |ret| { + if (ret.expr) |expr| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(expr).*); + try node_t.add_node(expr_t); + } + }, + .Print => |print| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(print.expr).*); + try node_t.add_node(expr_t); + if (print.hasEndl) { + const endl_t = TreeNode.init(alloc, "endl"); + try node_t.add_node(endl_t); + } + }, + .New => |new| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(new.ident).*); + try node_t.add_node(expr_t); + }, + .NewIntArray => |newIntArray| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(newIntArray.length).*); + try node_t.add_node(expr_t); + }, + .Delete => |del| { + const expr_t = try expr_into_treenode(alloc, ast, ast.get(del.expr).*); + try node_t.add_node(expr_t); + }, + // base nodes with no children + // typed Identifier has children but we display it as `{type} {name}` + // for simplicity's sake + .TypedIdentifier, + .Number, + .Identifier, + .True, + .False, + .Read, + .Null, + .Type, + .BoolType, + .IntType, + .IntArrayType, + .Void, + .StructType, + .ArgumentEnd, + .ArgumentsEnd, + .ReturnTypedIdentifier, + .ReturnType, + => {}, + .FunctionEnd, .BackfillReserve => { + log.warn("unhandled expr_into_treenode: {s}\n", .{@tagName(node.kind)}); + }, + // else => utils.todo("expr_into_treenode: {s}", .{@tagName(node.kind)}), + } + return node_t; +} + +fn repr_node(alloc: std.mem.Allocator, ast: *const Ast, node: Ast.Node) ![]const u8 { + return switch (node.kind) { + // just print tag name + .LocalDeclarations, + .Types, + .Program, + .ProgramDeclarations, + .FunctionBody, + .Functions, + .StatementList, + .Statement, + .Assignment, + .Factor, + .Selector, + .Expression, + .Block, + .True, + .False, + .ConditionalIf, + .ConditionalIfElse, + .While, + .Return, + => @tagName(node.kind), + // print tag name and token tag name + .UnaryOperation, .BinaryOperation => std.fmt.allocPrint(alloc, "{s} {s}", .{ @tagName(node.kind), tok_name(node) }), + // print tag name and token value + .LValue, .Identifier, .Number => std.fmt.allocPrint(alloc, "{s} {s}", .{ @tagName(node.kind), tok_str(ast, node) }), + .SelectorChain => |chain| std.fmt.allocPrint(alloc, "{s} .{s}", .{ @tagName(node.kind), ast.getIdentValue(chain.ident) }), + .TypeDeclaration => |tDecls| std.fmt.allocPrint(alloc, "Struct {s}", .{ast.getIdentValue(tDecls.ident)}), + .TypedIdentifier => |tIdent| std.fmt.allocPrint(alloc, "{s} {s}", .{ @tagName(tIdent.getType(ast)), tIdent.getName(ast) }), + .Function => |funDef| std.fmt.allocPrint(alloc, "Fun {s}", .{ast.getIdentValue(ast.get(funDef.proto).*.kind.FunctionProto.name)}), + .FunctionProto => |proto| std.fmt.allocPrint(alloc, "{s} -> {s}", .{ ast.getIdentValue(proto.name), @tagName(if (proto.getReturnType(ast)) |ty| ty else .Void) }), + .Invocation => |funCall| std.fmt.allocPrint(alloc, "Call {s}", .{ast.getIdentValue(funCall.funcName)}), + else => { + log.warn("unhandled repr_node: {s}\n", .{@tagName(node.kind)}); + return @tagName(node.kind); + }, + }; +} + +fn dbg(label: []const u8, str: []const u8) []const u8 { + std.debug.print("{s}: {s}\n", .{ label, str }); + return str; +} + +fn tok_str(ast: *const Ast, node: Ast.Node) []const u8 { + return node.token._range.getSubStrFromStr(ast.input); +} + +fn tok_name(node: Ast.Node) []const u8 { + return @tagName(node.token.kind); +} + +test "ast/stringify.make-sure-this-shit-compiles" { + defer log.print(); + // TODO: comment out the `else` arms in the repr and treenode + // switch statements and handle unhandled nodes + // i.e. + // - Delete + // - New + // - Read + // - Print + // - Print endl + // - BackfillReserve -> unreachable + // FIXME: + // figure out why it is printing the local a in main + // as a global + const input = + \\ struct foo { + \\ int a; + \\ bool b; + \\ struct foo foo; + \\ }; + \\ + \\ int globalA; + \\ bool globalB; + \\ struct foo globalFoo; + \\ + \\ fun main() void { + \\ int a; + \\ bool b; + \\ struct foo foo; + \\ a = -1 / 2 * 3 + 4 - 5; + \\ b = !true == false && a < a && a > a || a <= 10 || a >= 15 || a != 12; + \\ foo.foo.foo.a = a; + \\ while (b) { + \\ if (false) { + \\ return; + \\ } else { + \\ a = -0; + \\ } + \\ } + \\ } + ; + const tokens = try @import("../lexer.zig").Lexer.tokenizeFromStr(input, std.heap.page_allocator); + const parser = try @import("../parser.zig").Parser.parseTokens(tokens, input, std.heap.page_allocator); + const ast = try Ast.initFromParser(parser); + try print_tree(&ast); +} diff --git a/src/hanoi_local.mini b/src/hanoi_local.mini new file mode 100644 index 0000000..fc06c53 --- /dev/null +++ b/src/hanoi_local.mini @@ -0,0 +1,143 @@ +# Towers of Hanoi + +struct plate +{ + int size; + struct plate plateUnder; +}; + +struct plate peg1; +struct plate peg2; +struct plate peg3; +int numMoves; + +fun move(int from, int to) void +{ + struct plate plateToMove; + + if (from == 1) { + plateToMove = peg1; + peg1 = peg1.plateUnder; + } + else + { + if (from == 2) { + plateToMove = peg2; + peg2 = peg2.plateUnder; + } + else { + plateToMove = peg3; + peg3 = peg3.plateUnder; + } + } + + if (to == 1) { + plateToMove.plateUnder = peg1; + peg1 = plateToMove; + } + else + { + if (to == 2) { + plateToMove.plateUnder = peg2; + peg2 = plateToMove; + } + else + { + plateToMove.plateUnder = peg3; + peg3 = plateToMove; + } + } + + numMoves = numMoves + 1; +} + +fun hanoi(int n, int from, int to, int other) void +{ + if (n == 1) { + move(from, to); + } + else + { + hanoi(n - 1, from, other, to); + move(from, to); + hanoi(n - 1, other, to, from); + } +} + +fun printPeg(struct plate peg) void +{ + struct plate aPlate; + + aPlate = peg; + + print 66 endl; + while (aPlate != null) + { + print 67 endl; + print aPlate.size endl; + print 68 endl; + aPlate = aPlate.plateUnder; + print 69 endl; + } +} + +fun main() int +{ + int count, numPlates; + struct plate aPlate; + + peg1 = null; + peg2 = null; + peg3 = null; + numMoves = 0; + + numPlates = read; + + if (numPlates >= 1) + { + count = numPlates; + + while (count != 0) + { + aPlate = new plate; + aPlate.size = count; + aPlate.plateUnder = peg1; + peg1 = aPlate; + count = count - 1; + } + + # Print the peg number followed by any plates + # it has starting from the top of the stack + # to the bottom. At this point, peg 1 + # should have all the plates. + print 1 endl; + printPeg(peg1); + print 2 endl; + printPeg(peg2); + print 3 endl; + printPeg(peg3); + + hanoi(numPlates, 1, 3, 2); + + # At this point, peg 3 + # should have all the plates. + print 1 endl; + printPeg(peg1); + print 2 endl; + printPeg(peg2); + print 3 endl; + printPeg(peg3); + + # Print the number of moves. + print numMoves endl; + + while (peg3 != null) + { + aPlate = peg3; + peg3 = peg3.plateUnder; + delete aPlate; + } + } + + return 0; +} diff --git a/src/inter_fun_structs.mini b/src/inter_fun_structs.mini new file mode 100644 index 0000000..0dfd513 --- /dev/null +++ b/src/inter_fun_structs.mini @@ -0,0 +1,22 @@ + +struct Node { + struct Node n; + int a; +}; + +fun comparevalue(struct Node head) void { + struct Node currnode; + currnode = head; + if(head.a != 0){ + print currnode.n.a endl; + } +} + +fun main() void{ + struct Node head; + head = new Node; + head.n = new Node; + head.n.a = 2; + head.a = 1; + comparevalue(head); +} diff --git a/src/ir/ir.zig b/src/ir/ir.zig index 8bc05b7..e2090ba 100644 --- a/src/ir/ir.zig +++ b/src/ir/ir.zig @@ -79,7 +79,7 @@ pub fn astTypeToIRType(self: *IR, astType: Ast.Type) Type { .Bool => .bool, .Void => .void, .Null => std.debug.panic("FUCK WE HAVE TO HANDLE NULL TYPE\n", .{}), - .IntArray => utils.todo("Handle the array type", .{}), + .IntArray => .int_arr, .Struct => |name| blk: { const structID = self.internIdent(name); break :blk .{ .strct = structID }; @@ -924,29 +924,34 @@ pub const Type = union(enum) { void, int, bool, - // sawy dylan + /// sawy dylan strct: StructID, - // only used for malloc, free, printf, read decls and args - // will always be a pointer to i8 + /// only used for malloc, free, printf, read decls and args + /// will always be a pointer to i8 i8, - // only used for args to malloc and gep as shown in the - // examples beard gave us - // could just use int but I think it being wierd helps - // make it stand out and that is probably a good thing + /// only used for args to malloc and gep as shown in the + /// examples beard gave us + /// could just use int but I think it being wierd helps + /// make it stand out and that is probably a good thing i32, + /// Language int_array type. + /// Arr is for known size arrays like the builtin printf format strings + /// This is for user arrays + int_arr, arr: struct { type: enum { i8, - // Same as Type.int, just has to be a separate thing - // for - // 1. semantics - we only have arrays of i8 (the printf inputs) - // and soon int (the user arrays) - // 2. to avoid having the type be recursively defined - // which zig likes to bitch and moan about (understandably) + /// Same as Type.int, just has to be a separate thing + /// for + /// 1. semantics - we only have arrays of i8 (the printf inputs) + /// and soon int (the user arrays) + /// 2. to avoid having the type be recursively defined + /// which zig likes to bitch and moan about (understandably) int, }, len: u32, }, + /// null type - note special null_, /// The type used instead of optionals pub const default = Type.void; @@ -972,6 +977,9 @@ pub const Type = union(enum) { pub fn sizeof(self: Type) u32 { return switch (self) { .strct, .int, .null_ => 8, + // int_arr is just a pointer to a dynamically allocated + // array so it is just the size of a pointer + .int_arr => 8, .i8, .bool => 1, .void => 0, .i32 => 4, diff --git a/src/ir/ir_phi.zig b/src/ir/ir_phi.zig new file mode 100644 index 0000000..b30fe5c --- /dev/null +++ b/src/ir/ir_phi.zig @@ -0,0 +1,3384 @@ +// STACK GEN +pub const std = @import("std"); + +const Ast = @import("../ast.zig"); +const utils = @import("../utils.zig"); +const log = @import("../log.zig"); +const Set = @import("../array_hash_set.zig"); + +pub const InternPool = @import("../intern-pool.zig"); +/// The ID of a string stored in the intern pool +/// Henceforth, all operations involving variable or struct names +/// shall utilize the power of this type, rather than `std.mem.eql(u8, a, b);` +pub const StrID = InternPool.StrID; + +pub const IR = @This(); + +types: TypeList, +globals: GlobalsList, +funcs: FunctionList, +intern_pool: InternPool, +alloc: std.mem.Allocator, + +// NOTE: could be made variable by making this a field in the IR struct +// SEE: https://releases.llvm.org/7.0.0/docs/LangRef.html#data-layout +// for defaults this is probably the safest byte alignment +pub const ALIGN = 8; + +pub fn reduceChainToFirstIdent(self: *IR, chain: StrID) StrID { + const chain_long = self.getIdent(chain); + var start: usize = 0; + var end: usize = 0; + for (chain_long) |c| { + if (c == '.') { + break; + } + end += 1; + } + var sliced = chain_long[start..end]; + return self.internIdent(sliced); +} + +pub fn isIdentChain(self: *IR, id: StrID) bool { + // just check if there is a . in the str + const str = self.getIdent(id); + for (str) |c| { + if (c == '.') { + return true; + } + } + return false; +} + +// something like s.a.ass.penis -> Id{s}, Id{a}, +pub fn chainToStrIdList(self: *IR, chain: StrID) !std.ArrayList(StrID) { + var list = std.ArrayList(StrID).init(self.alloc); + var chain_long = self.getIdent(chain); + var tokenizer = std.mem.tokenize(u8, chain_long, "."); + while (tokenizer.next()) |piece| { + try list.append(self.internIdent(piece)); + } + return list; +} + +pub fn init(alloc: std.mem.Allocator) IR { + return .{ + .types = TypeList.init(), + .globals = GlobalsList.init(), + .funcs = FunctionList.init(), + .intern_pool = InternPool.init(alloc) catch unreachable, + .alloc = alloc, + }; +} + +const Stringify = @import("./stringify_phi.zig"); + +/// Stringify the IR with default config options +/// NOTE: highly recommended to pass a std.heap.ArenaAllocator.allocator +pub fn stringify(self: *const IR, alloc: std.mem.Allocator) ![]const u8 { + return self.stringify_cfg(alloc, .{ + .header = false, + }); +} +pub fn stringifyWithHeader(self: *const IR, alloc: std.mem.Allocator) ![]const u8 { + return self.stringify_cfg(alloc, .{ + .header = true, + }); +} + +pub fn stringify_cfg(self: *const IR, alloc: std.mem.Allocator, cfg: Stringify.Config) ![]const u8 { + return Stringify.stringify(self, alloc, cfg); +} + +pub fn internIdent(self: *IR, ident: []const u8) StrID { + return self.intern_pool.intern(ident) catch |err| { + // The only way this can fail is if the intern pool is out of memory + // I'm not typing try all over the place just so it bubbles up further + // Im sawy mistew kewwey + std.debug.panic("Failed to intern ident: {any}\n", .{err}); + }; +} + +/// Puts the ident at the given index in the ast into the interning pool +pub fn internIdentNodeAt(self: *IR, ast: *const Ast, identIdx: usize) StrID { + const str = ast.getIdentValue(identIdx); + return self.internIdent(str); +} + +pub fn internToken(self: *IR, ast: *const Ast, token: Ast.Token) StrID { + const value = token._range.getSubStrFromStr(ast.input); + return self.internIdent(value); +} + +pub fn astTypeToIRType(self: *IR, astType: Ast.Type) Type { + return switch (astType) { + .Int => .int, + .Bool => .bool, + .Void => .void, + .Null => std.debug.panic("FUCK WE HAVE TO HANDLE NULL TYPE\n", .{}), + .IntArray => .int_arr, + .Struct => |name| blk: { + const structID = self.internIdent(name); + break :blk .{ .strct = structID }; + }, + }; +} + +pub fn safeGetIdent(self: *const IR, id: StrID) ![]const u8 { + // this is only supposed to be used for debugging, so just panic + return self.intern_pool.get(id); +} + +pub fn getIdent(self: *const IR, id: StrID) []const u8 { + // this is only supposed to be used for debugging, so just panic + return self.intern_pool.get(id) catch unreachable; +} + +pub fn getFun(self: *const IR, nameID: StrID) !Function { + for (self.funcs.items.items) |func| { + if (func.name == nameID) { + return func; + } + } + + return error.NotFound; +} + +pub fn getIdentID(self: *const IR, ident: []const u8) !StrID { + return self.intern_pool.getIDOf(ident); +} + +pub const GlobalsList = struct { + items: List, + + pub const List = StaticSizeLookupTable(StrID, Item, Item.getKey); + pub const Item = struct { + name: StrID, + type: Type, + + pub fn init(name: StrID, ty: Type) Item { + return .{ .name = name, .type = ty }; + } + + pub fn getKey(self: Item) StrID { + return self.name; + } + }; + + pub fn init() GlobalsList { + return .{ .items = undefined }; + } + + pub fn fill(self: *GlobalsList, items: []Item) void { + const lut = List.init(items); + self.items = lut; + } + + pub fn index(self: *const GlobalsList, idx: usize) Item { + return self.items.items[idx]; + } + + pub fn len(self: *const GlobalsList) usize { + return self.items.len; + } + + pub fn contains(self: *const GlobalsList, name: StrID) bool { + return self.items.contains(name); + } +}; + +pub const FunctionList = struct { + items: List, + pub const List = StaticSizeLookupTable(StrID, Function, Function.getKey); + + pub fn init() FunctionList { + return .{ .items = undefined }; + } + /// Note the lack of a way to add one item at a time, + /// only many at once + pub fn fill(self: *FunctionList, items: []Function) void { + self.items = List.init(items); + } + + pub fn contains(self: *const FunctionList, name: StrID) bool { + return self.items.contains(name); + } +}; + +pub const Function = struct { + alloc: std.mem.Allocator, + name: StrID, + returnType: Type, + bbsToCFG: std.AutoHashMap(BasicBlock.ID, CfgBlock.ID_t), + cfgToBBs: std.AutoHashMap(CfgBlock.ID_t, BasicBlock.ID), + defBlocks: std.AutoHashMap(StrID, std.ArrayList(BasicBlock.ID)), + declaredVars: std.AutoHashMap(StrID, Type), + bbs: OrderedList(BasicBlock), + regs: LookupTable(Register.ID, Register, Register.getID), + cfg: CfgFunction, + exitBBID: BasicBlock.ID, + retRegUsed: bool = false, + + /// a list of the instructions that are within the fuction + /// the basic blocks have a list of instructions that they use, + /// those come out of this list. + /// To remove or add instructions, either remove from the Basic Block list + /// or add to this ordered list and then referer to it in the Basic Block + insts: OrderedList(Inst), + returnReg: ?Register.ID = null, + paramRegs: std.AutoHashMap(StrID, Register.ID), + params: ParamsList, + typesMap: std.AutoHashMap(StrID, Type), + pub const entryBBID: usize = 0; + + pub fn init(alloc: std.mem.Allocator, name: StrID, returnType: Type, params: []Param) Function { + return .{ + .alloc = alloc, + .bbs = OrderedList(BasicBlock).init(alloc), + .name = name, + .returnType = returnType, + .regs = LookupTable(Register.ID, Register, Register.getID).init(alloc), + .params = ParamsList.init(params), + .insts = OrderedList(Inst).init(alloc), + .typesMap = std.AutoHashMap(StrID, Type).init(alloc), + .bbsToCFG = std.AutoHashMap(BasicBlock.ID, CfgBlock.ID_t).init(alloc), + .cfgToBBs = std.AutoHashMap(CfgBlock.ID_t, BasicBlock.ID).init(alloc), + .paramRegs = std.AutoHashMap(StrID, Register.ID).init(alloc), + .declaredVars = std.AutoHashMap(StrID, Type).init(alloc), + .exitBBID = 0, + .defBlocks = std.AutoHashMap(StrID, std.ArrayList(BasicBlock.ID)).init(alloc), + .cfg = CfgFunction.init(alloc), + }; + } + + pub fn identToType(self: *Function, ident: StrID) !Type { + const protoType = self.typesMap.get(ident); + if (protoType != null) { + return protoType; + } + } + + pub fn linkBBsFromCFG(self: *Function) !void { + for (self.cfg.postOrder.items) |cfgBlockID| { + const cfgBock = self.cfg.blocks.items[cfgBlockID]; + for (cfgBock.outgoers) |outgoer| { + if (outgoer == null) { + continue; + } + const edge = self.cfg.edges.items[outgoer.?]; + const bbID = self.cfgToBBs.get(edge.dest).?; + const bbInID = self.cfgToBBs.get(edge.src).?; + const bbOut = self.bbs.get(bbID); + const bbIn = self.bbs.get(bbInID); + try bbIn.addOutgoer(bbID); + try bbOut.addIncomer(bbInID); + } + } + } + + pub fn mapUsesBBFromCFG(self: *Function, ir: *IR) !void { + for (self.cfg.postOrder.items) |cfgBlockID| { + const cfgBock = self.cfg.blocks.items[cfgBlockID]; + const bbID = self.cfgToBBs.get(cfgBlockID).?; + for (cfgBock.typedIdents.items) |ident| { + var redIdent = ir.reduceChainToFirstIdent(ident); + var bb = self.bbs.get(bbID); + if (!bb.uses.contains(redIdent)) { + try bb.uses.put(redIdent, true); + } + } + } + } + + pub fn addPhiEntry(self: *Function, bbDest: BasicBlock.ID, ident: IR.StrId, bbFrom: Label, ref: Ref) !void { + // check if an entry already exists + // if it does, then just update the ref + const block = try self.bbs.get(bbFrom).*; + const phiInstID = block.phiMap.get(ident); + if (phiInstID != null) { + try self.insts.get(phiInstID).phiAddRef(bbFrom, ref); + return; + } + + var entries = std.ArrayList(PhiEntry).init(self.alloc); + defer entries.deinit(); + entries.append(.{ .bb = bbFrom, .ref = ref }); + // create a new phi inst + const _type = self.typesMap.get(ident).?; + const inst = Inst.phi(ref, _type, entries); + const phiInstReg = try self.addNamedInst(bbDest, inst, ident, _type); + const instId = phiInstReg.inst; + + // add the phi inst to the block's phi map + try block.addPhiInst(instId, ident); + + self.bbs.set(bbDest, block); + } + + // index into the insts array + pub const InstID = u32; + + pub const ParamsList = StaticSizeLookupTable(Param.ID, Param, Param.getKey); + pub const Param = struct { + name: StrID, + type: Type, + + pub const ID = u32; + pub fn getKey(self: @This()) StrID { + return self.name; + } + }; + + pub fn renameRef(self: *Function, ir: *IR, ref: Ref, name: StrID) Ref { + // check the kind of the ref + switch (ref.kind) { + .local => { + return self.renameLocalRef(ref, name); + }, + .param => { + utils.todo("use renameParamRef", .{}); + }, + .global => { + return self.renameGlobalRef(ir, ref, name); + }, + else => { + std.debug.panic("Unknown ref kind: {any}\n", .{ref.kind}); + }, + } + unreachable; + } + + pub fn renameRefAnon( + self: *Function, + ir: *IR, + ref: Ref, + ) Ref { + // check the kind of the ref + switch (ref.kind) { + .local => { + return self.renameLocalRef(ref, IR.InternPool.NULL); + }, + .param => { + var ref_ = ref; + ref_.name = IR.InternPool.NULL; + return ref_; + }, + .global => { + return self.renameGlobalRef(ir, ref, IR.InternPool.NULL); + }, + else => { + std.debug.panic("Unknown ref kind: {any}\n", .{ref.kind}); + }, + } + unreachable; + } + + pub fn renameParamRef(self: *Function, ir: *IR, ref: Ref, name: StrID, inst: IR.Function.InstID) Ref { + utils.todo("This should be removed in refactoring to params as reg", .{}); + _ = self; + _ = ir; + if (inst == 0) {} + // ref.debugPrintWithName(ir); + // utils.todo("Tried to rename a param, this is not allowed", .{}); + // const param = self.params.contains(ref.name); + // param.name = name; + // self.params.set(ref.i, param); + // return Ref.param(ref.i, name, param.type); + var refCopy = ref; + refCopy.kind = .localedParam; + refCopy.name = name; + refCopy.extra = inst; + return refCopy; + } + + pub fn renameGlobalRef(self: *Function, ir: *IR, ref: Ref, name: StrID) Ref { + ref.debugPrintWithName(ir); + _ = name; + _ = self; + utils.todo("Tried to rename a gloabl ref, this is not implemented yet", .{}); + } + + pub fn renameLocalRef(self: *Function, ref: Ref, name: StrID) Ref { + // get the register + var reg = self.regs.get(ref.i); + var inst = self.insts.get(reg.inst); + reg.name = name; + inst.res = IR.Ref.fromRegLocal(reg); + self.regs.set(ref.i, reg); + self.insts.set(reg.inst, inst.*); + return inst.res; + } + + pub fn getKey(self: Function) StrID { + return self.name; + } + + /// Requires a name which will go to the name of the label in the strinify + pub fn newBB(self: *Function, name: []const u8) !BasicBlock.ID { + const bb = BasicBlock.init(self.alloc, name); + const id = try self.bbs.add(bb); + return id; + } + + /// + pub fn newBBWithParent(self: *Function, parent: BasicBlock.ID, name: []const u8) !BasicBlock.ID { + var bb = BasicBlock.init(self.alloc, name); + + // add the given parent as an incomer + try bb.addIncomer(parent); + const id = try self.bbs.add(bb); + + // add itself to the parent's outgoers list + _ = try self.bbs.get(parent).addOutgoer(id); + return id; + } + + pub fn addNamedInst(self: *Function, bb: BasicBlock.ID, basicInst: Inst, name: StrID, ty: Type) !Register { + // reserve + const regID = try self.regs.add(undefined); + const instID = try self.insts.add(undefined); + + // construct the register to be added, using the reserved IDs + const reg = Register{ .id = regID, .inst = instID, .name = name, .bb = bb, .type = ty }; + var inst = basicInst; + inst.res = Ref.local(regID, name, ty); // update the reference of the incoming instruction + + // save + self.regs.set(regID, reg); + self.insts.set(instID, inst); // in the inst array update the resulting instruction + try self.bbs.get(bb).insts.append(instID); + try self.bbs.get(bb).versionMap.put(name, inst.res); + return reg; + } + + /// Add an unnamed instruction, this is used for intermeidates, + /// This is preety much used for print and read as shown for the LLVM stuff + pub fn addInst(self: *Function, bb: BasicBlock.ID, inst: Inst, ty: Type) !Register { + return self.addNamedInst(bb, inst, InternPool.NULL, ty); + } + + pub fn addCtrlFlowInst(self: *Function, bb: BasicBlock.ID, inst: Inst) !void { + utils.assert(inst.isCtrlFlow(), "tried to add non control flow instruction:\n{any}\n", .{inst}); + // check if the block already ends in a control flow statement + // add inst if it doesnt, otherise assert that the two are the same + // for easier debugging purposes + // this makes it easy for a parent function to say "add this control flow instruction if not already there" + // ex. adding a jump to the `%exit` block from the last bb in a function body + const maybeLastBBInstID = self.bbs.get(bb).getLastInstID(); + if (maybeLastBBInstID) |lastBBInstID| { + const lastInst = self.insts.get(lastBBInstID).*; + if (lastInst.isCtrlFlow()) { + if (!ctrlFlowInstsEqual(lastInst, inst)) { + log.err("tried to add control flow instruction to block that already had different control flow instruction.\nexisting = {any}\nnew = {any}\n", .{ lastInst, inst }); + return error.ConflictingControlFlowInstructions; + } + // last inst is already correct, add edges if they dont exist already + try self.connectInstBBs(bb, inst); + return; + } + // otherwise we continue with the add + } + const instID = try self.insts.add(inst); + try self.connectInstBBs(bb, inst); + try self.bbs.get(bb).insts.append(instID); + } + + /// adds edges between basic blocks based on the given control flow instruction + /// within the given basic block + /// NOTE: should not mess things up if the blocks are already connected, + /// the addOutgoer|Incomer functions check if the edge is already defined + pub fn connectInstBBs(self: *Function, bb: BasicBlock.ID, inst: Inst) !void { + switch (inst.op) { + .Jmp => { + const jmp = Inst.Jmp.get(inst); + try self.bbs.get(bb).addOutgoer(jmp.dest); + try self.bbs.get(jmp.dest).addIncomer(bb); + }, + .Br => { + const br = Inst.Br.get(inst); + try self.bbs.get(bb).addOutgoer(br.iftrue); + try self.bbs.get(bb).addOutgoer(br.iffalse); + try self.bbs.get(br.iftrue).addIncomer(bb); + try self.bbs.get(br.iffalse).addIncomer(bb); + }, + else => { + std.debug.panic("addLoadAndStoreTo: Invalid control flow instruction: {any}\n", .{@tagName(inst.op)}); + }, + } + } + + /// Only realy useful for store - i.e. the only(?) instruction that does not have + /// a result register and is also noth control flow + /// This is functionally identical to addCtrlFlowInst but with a different name + /// for semantic clarity + pub fn addAnonInst(self: *Function, bb: BasicBlock.ID, inst: Inst) !void { + const instID = try self.insts.add(inst); + try self.bbs.get(bb).insts.append(instID); + } + + pub const NotFoundError = error{ OutOfMemory, UnboundIdentifier, AllocFailed }; + + pub fn getNamedRef(self: *Function, ir: *IR, name: StrID, bb: BasicBlock.ID, assignmentTOrAccessF: bool) NotFoundError!Ref { + const namedRef = try self.getNamedRefInner(ir, name, bb, assignmentTOrAccessF); + if (self.returnReg == null) return namedRef; + if (namedRef.i == self.returnReg.?) { + self.retRegUsed = true; + } + return namedRef; + } + + pub fn getNamedRefInner(self: *Function, ir: *IR, name: StrID, bb: IR.BasicBlock.ID, assignmentTOrAccessF: bool) NotFoundError!Ref { + var ref = try self.getNamedRefNoAdd(ir, name, bb); + if (ref != null) return ref.?; + + // at this point we know that it is a declared variable, but it has not been used yet + // we can create a new register for it based on the passed (desired) outcome + if (assignmentTOrAccessF) { + // if this is anot assigned over -><- we boned + const declType = self.typesMap.get(name).?; + const refAss = Ref.local(0, name, declType); + return refAss; + } else { + // we need to create a new register for this in the entry block using alloca + const declType = self.typesMap.get(name).?; + const alloca = Inst.alloca(declType); + const allocReg = try self.addNamedInst(Function.entryBBID, alloca, name, declType); + // add a load also for those quircky girls + const allocRef = IR.Ref.fromReg(allocReg, self, ir); + const load = Inst.load(declType, allocRef); + const loadReg = try self.addNamedInst(Function.entryBBID, load, name, declType); + const loadRef = IR.Ref.fromReg(loadReg, self, ir); + try self.bbs.get(Function.entryBBID).versionMap.put(name, loadRef); + return Ref.fromRegLocal(loadReg); + } + + return error.UnboundIdentifier; + } + + pub fn getNamedRefNoAdd(self: *Function, ir: *IR, name: StrID, bb: IR.BasicBlock.ID) NotFoundError!?Ref { + // if (name != IR.InternPool.NULL) { + // std.debug.print("getting ref for {s}\n", .{ir.getIdent(name)}); + // } else { + // std.debug.print("getting ref for NULL\n", .{}); + // } + // check if the register is in the current block + if (self.bbs.get(bb).versionMap.contains(name)) { + return self.bbs.get(bb).versionMap.get(name).?; + } + + // do bfs to find the in the incoming blocks + var queue = std.ArrayList(BasicBlock.ID).init(self.alloc); + defer queue.deinit(); + var visited = std.AutoHashMap(BasicBlock.ID, bool).init(self.alloc); + defer visited.deinit(); + try queue.append(bb); + try visited.put(bb, true); + while (queue.items.len > 0) { + const current = queue.orderedRemove(0); + // std.debug.print("visiting {s}\n", .{self.bbs.get(current).name}); + if (self.bbs.get(current).versionMap.contains(name)) { + // std.debug.print("found in block {d}\n", .{current}); + return self.bbs.get(current).versionMap.get(name).?; + } + for (self.bbs.get(current).incomers.items) |incomer| { + if (visited.contains(incomer)) { + continue; + } + try queue.append(incomer); + try visited.put(incomer, true); + } + } + + if (self.bbs.get(IR.Function.entryBBID).versionMap.contains(name)) { + return self.bbs.get(IR.Function.entryBBID).versionMap.get(name).?; + } + // we have not found it, we have traversed the tree all the way up! oh no! + + // checks the function's parameters + if (self.paramRegs.contains(name)) { + const paramRegID = self.paramRegs.get(name).?; + const paramReg = self.regs.get(paramRegID); + return Ref.fromReg(paramReg, self, ir); + } + // now we have to check if its in the typesMap, + + // okay she's nowhere... + // if it is declared in this function return null, otherwise search on + if (self.declaredVars.contains(name)) { + return null; + } + // check if its a function? + + if (ir.funcs.items.safeIndexOf(name)) |funcID| { + const func = ir.funcs.items.entry(funcID); + return Ref.global(funcID, func.name, func.returnType); + } + + log.trace("fun.name not found := {s}\n", .{ + ir.getIdent(name), + }); + + for (ir.funcs.items.items) |func| { + log.trace("func := {s}\n", .{ir.getIdent(func.name)}); + } + // check if its a global + // TODO: add it so that global vars are loaded on use, will have to do the same on store + if (ir.globals.items.safeIndexOf(name)) |globalID| { + const global = ir.globals.items.entry(globalID); + return Ref.global(globalID, global.name, global.type); + } + + std.debug.print("name not found := {s}\n", .{ + ir.getIdent(name), + }); + return error.UnboundIdentifier; + } + + /// Gets the ID of a register created with an `alloca` in the entry + /// based on the name of the identifier in question + /// Returns `error.NotFound` + /// WARN: ONLY SUPPOSED TO BE USED IN STACK IR GEN + /// IN PHI NODES WE SHOULD SEARCH UP THE CFG + fn getNamedAllocaReg(self: *Function, name: StrID) NotFoundError!Register { + // 1 2 4 5 6 :( + for (self.bbs.get(Function.entryBBID).insts.items()) |instID| { + const inst = self.insts.get(instID); + const res = inst.res; + if (res.name == name) { + return self.regs.get(res.i); + } + } + return error.UnboundIdentifier; + } + + pub fn setReturnReg(self: *Function, reg: Register.ID) void { + self.returnReg = reg; + } + + pub const InstIter = struct { + func: *const Function, + bb: BasicBlock.ID, + instIndex: u32, + + pub fn init(func: *const Function) InstIter { + return .{ .func = func, .bb = Function.entryBBID, .instIndex = 0 }; + } + + pub const Item = struct { + bb: BasicBlock.ID, + inst: Inst, + }; + + pub fn next(self: *InstIter) ?Item { + if (self.bb >= self.func.bbs.len) { + return null; + } + var bb = self.func.bbs.get(self.bb); + if (self.instIndex >= bb.insts.len) { + if (self.bb == self.func.exitBBID) { + return null; + } + if (self.bb >= self.func.bbs.len - 1) { + self.bb = self.func.exitBBID; + } else { + self.bb += 1; + if (self.bb == self.func.exitBBID) { + // skip the exit bb too + self.bb += 1; + } + } + bb = self.func.bbs.get(self.bb); + self.instIndex = 0; + } + // yeah... this one bit me + if (self.instIndex >= bb.insts.len) { + return null; + } + const instID = bb.insts.get(self.instIndex).*; + self.instIndex += 1; + return .{ .bb = self.bb, .inst = self.func.insts.get(instID).* }; + } + }; + + pub fn instIter(self: *const Function) InstIter { + return InstIter.init(self); + } + + pub fn getOrderedInsts(self: *const Function, alloc: std.mem.Allocator) ![]Inst { + var insts = try alloc.alloc(Inst, self.insts.len); + var iter = self.instIter(); + var i: usize = 0; + while (iter.next()) |inst| : (i += 1) { + insts[i] = inst; + } + return insts; + } +}; + +fn ctrlFlowInstsEqual(a: Inst, b: Inst) bool { + utils.assert(a.isCtrlFlow(), "tried to compare non ctrl flow instruction:\n{any}\n", .{a}); + utils.assert(b.isCtrlFlow(), "tried to compare non ctrl flow instruction:\n{any}\n", .{b}); + + if (a.op == .Br and b.op == .Br) { + const aBr = Inst.Br.get(a); + const bBr = Inst.Br.get(b); + return aBr.eq(bBr); + } + if (a.op == .Jmp and b.op == .Jmp) { + const aJmp = Inst.Jmp.get(a); + const bJmp = Inst.Jmp.get(b); + return aJmp.eq(bJmp); + } + if (a.op == .Ret and b.op == .Ret) { + const aRet = Inst.Ret.get(a); + const bRet = Inst.Ret.get(b); + return aRet.eq(bRet); + } + log.err("ctrl flow instructions are of different kinds: {s} != {s}\n", .{ @tagName(a.op), @tagName(b.op) }); + return false; +} + +/// The number for the resiter is based off the ID ad the name +/// so if there are two xs in phi, one with id 2 and one iwth id 20 +/// x = 5 +/// x = x +1; +/// x2 = 5; +/// x20 = x2 + 1; +pub const Register = struct { + id: ID, + inst: Function.InstID, + name: StrID, + bb: BasicBlock.ID, + type: Type, + + pub const ID = u32; + + pub const default: Register = .{ + .id = 0xdeadbeef, + .inst = 0xdeadbeef, + .name = InternPool.NULL, + .bb = 0xdeadbeef, + .type = .void, + }; + + pub fn getID(self: Register) ID { + return self.id; + } +}; + +pub const Edge = struct { + src: CfgBlock.ID_t, + dest: CfgBlock.ID_t, + ID: usize, + pub const ID_t = usize; +}; + +pub const CfgBlock = struct { + alloc: std.mem.Allocator, + statements: std.ArrayList(Ast.Node), + typedIdents: std.ArrayList(StrID), + assignments: std.ArrayList(StrID), + incomers: std.ArrayList(Edge.ID_t), + outgoers: [2]?Edge.ID_t, + conditional: bool = false, + ID: usize, + name: []const u8, + pub const ID_t = usize; + + pub fn print(self: *CfgBlock) void { + std.debug.print("Block: {d}\n", .{self.ID}); + std.debug.print("Incomers: ", .{}); + for (self.incomers.items) |incomer| { + std.debug.print("{d} ", .{incomer}); + } + std.debug.print("\n", .{}); + std.debug.print("Outgoers: ", .{}); + for (self.outgoers) |outgoer| { + if (outgoer == null) { + continue; + } + std.debug.print("{d} ", .{outgoer.?}); + } + std.debug.print("\n", .{}); + } + + pub fn init(alloc: std.mem.Allocator, name: []const u8) CfgBlock { + return .{ + .alloc = alloc, + .incomers = std.ArrayList(Edge.ID_t).init(alloc), + .statements = std.ArrayList(Ast.Node).init(alloc), + .outgoers = [2]?Edge.ID_t{ null, null }, + .typedIdents = std.ArrayList(StrID).init(alloc), + .assignments = std.ArrayList(StrID).init(alloc), + .name = name, + .ID = 0, + }; + } + + pub fn addIdentsFromStatement(self: *CfgBlock, ir: *IR, ast: *const Ast, node: Ast.Node) !void { + const stat = node.kind.Statement; + const final = stat.finalIndex; + const start = stat.statement; + // from start to end find any typed identifiers + for (start..final) |idx| { + const c_node = ast.get(idx).*; + // check if it's a typed identifier + switch (c_node.kind) { + .TypedIdentifier => { + const typedIdent = c_node.kind.TypedIdentifier; + const ident = typedIdent.getName(ast); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + }, + .Selector => { + const ident = try ast.selectorToString(idx); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + }, + .LValue => { + const ident = try ast.lvalToString(idx); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + try self.assignments.append(name); + }, + else => {}, + } + } + } + + pub fn addIdentsFromExpression(self: *CfgBlock, ir: *IR, ast: *const Ast, node: Ast.Node) !void { + const expr = node.kind.Expression; + const final = expr.last - 1; + const start = expr.expr; + // from start to end find any typed identifiers + for (start..final) |idx| { + const c_node = ast.get(idx).*; + // check if it's a typed identifier + switch (c_node.kind) { + .TypedIdentifier => { + const typedIdent = c_node.kind.TypedIdentifier; + const ident = typedIdent.getName(ast); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + }, + .Selector => { + const ident = try ast.selectorToString(idx); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + }, + .LValue => { + const ident = try ast.lvalToString(idx); + if (ident.len == 0) { + continue; + } + const name = ir.internIdent(ident); + try self.typedIdents.append(name); + try self.assignments.append(name); + }, + else => {}, + } + } + } + + pub fn addIncomer(self: *CfgBlock, fun: *CfgFunction, incomer: CfgBlock.ID_t) !Edge { + // // see if the incommer already has an outgoer to this block + // for (fun.blocks.items[incomer].outgoers) |outgoer| { + // if (outgoer == null) continue; + // const edge1 = fun.edges.items[outgoer.?]; + // if (edge1.dest == self.ID) { + // return edge1; + // } + // } + // create a new edge + const edge = Edge{ .src = incomer, .dest = self.ID, .ID = fun.edges.items.len }; + try fun.edges.append(edge); + try fun.blocks.items[self.ID].incomers.append(edge.ID); + var edge_res = try fun.blocks.items[incomer].addOutgoerEdge(fun, edge.ID); + try fun.assertEdgeBothSides(edge_res.ID); + return edge_res; + } + + pub fn addOutgoer(self: *CfgBlock, fun: *CfgFunction, outgoer: CfgBlock.ID_t) !Edge { + // check if we already outgo to this block + // if we do, return + for (self.outgoers) |out| { + if (out == null) continue; + // get the edge from the lsit + const edge = fun.edges.items[out.?]; + if (edge.dest == outgoer) { + return edge; + } + } + // add ourselves as a incomer to the outgoer + var edge_res = try fun.blocks.items[outgoer].addIncomer(fun, self.ID); + try fun.assertEdgeBothSides(edge_res.ID); + return edge_res; + } + + pub fn addOutgoerEdge(self: *CfgBlock, fun: *CfgFunction, outgoer: Edge.ID_t) !Edge { + // see the comment in `addOutgoer` for why this is done + // alternative is to just ignore duplicates while actually + // using the cfg, but that seems kinda annoying ngl + if (self.outgoers[0] == null) { + fun.blocks.items[self.ID].outgoers[0] = outgoer; + // get the edge + const edge = fun.edges.items[outgoer]; + return edge; + } else if (self.outgoers[1] == null) { + fun.blocks.items[self.ID].outgoers[1] = outgoer; + + // get the edge + const edge = fun.edges.items[outgoer]; + return edge; + } else { + return error.TooManyOutgoers; + } + } + + // returns false if no edge was added(could not be found) + pub fn updateEdge(self: *CfgBlock, fun: *CfgFunction, old_edge: Edge.ID_t, new_edge: Edge.ID_t) !bool { + // check if its in the incomers + var flag: bool = false; + for (self.incomers.items, 0..) |incomer, i| { + if (incomer == old_edge) { + fun.blocks.items[self.ID].incomers.items[i] = new_edge; + flag = true; + } + } + // check if its in the outgoers + if (self.outgoers[0] != null) { + if (self.outgoers[0] == old_edge) { + fun.blocks.items[self.ID].outgoers[0] = new_edge; + flag = true; + } + } + if (self.outgoers[1] != null) { + if (self.outgoers[1] == old_edge) { + fun.blocks.items[self.ID].outgoers[1] = new_edge; + flag = true; + } + } + if (flag) { + return true; + } + + return error.CfgEdgeNotFound; + } +}; + +pub const CfgFunction = struct { + pub const ID = usize; + blocks: std.ArrayList(CfgBlock), + postOrder: std.ArrayList(CfgBlock.ID_t), + postOrderMap: std.AutoHashMap(CfgBlock.ID_t, usize), + edges: std.ArrayList(Edge), + alloc: std.mem.Allocator, + params: std.ArrayList(StrID), + decls: std.ArrayList(StrID), + declsUsed: std.AutoHashMap(StrID, bool), + assignments: std.AutoHashMap(StrID, std.AutoHashMap(CfgBlock.ID_t, bool)), + paramsUsed: std.ArrayList(StrID), + statements: std.ArrayList(Ast.Node), + funNode: Ast.Node.Kind.FunctionType, + dominators: std.ArrayList(Set.Set(CfgBlock.ID_t)), + idoms: std.AutoHashMap(CfgBlock.ID_t, CfgBlock.ID_t), + domChildren: std.AutoHashMap(CfgBlock.ID_t, std.ArrayList(CfgBlock.ID_t)), + domFront: std.AutoHashMap(CfgBlock.ID_t, std.ArrayList(CfgBlock.ID_t)), + exitID: CfgBlock.ID_t, + + pub const BSet = Set.Set(CfgBlock.ID_t); + + pub fn getBlockIncomerIDs(self: *CfgFunction, blockID: CfgBlock.ID_t) !std.ArrayList(CfgBlock.ID_t) { + const block = self.blocks.items[blockID]; + var result = std.ArrayList(CfgBlock.ID_t).init(self.alloc); + for (block.incomers.items) |incomer| { + try result.append(self.edges.items[incomer].src); + } + return result; + } + + pub fn getPostID(self: *CfgFunction, postID: usize) CfgBlock.ID_t { + return self.postOrder.items[postID]; + } + + // // dominator of the start node is the start itself + // Dom(n0) = {n0} + // // for all other nodes, set all nodes as dominators + // for each n in N - {n0} + // Dom(n) = N; + // // iteratively eliminate nodes that are not dominators + // while changes in any Dom(n) + // for each n in N - {n0}: + // Dom(n) = {n} union with intersection over Dom(p) for all p in pred(n) + // return Dom + pub fn generateDominators(self: *CfgFunction) !void { + var result = try std.ArrayList(Set.Set(CfgBlock.ID_t)).initCapacity(self.alloc, self.blocks.items.len); + // fill all the dominators with empty + for (self.blocks.items) |_| { + try result.append(BSet.init()); + } + + // // dominator of the start node is the start itself + // Dom(n0) = {n0} + // // for all other nodes, set all nodes as dominators + // for each n in N - {n0} + // Dom(n) = N; + // initialize the dominator sets + for (self.postOrder.items, 0..) |block, i| { + if (i == 0) { + _ = try result.items[block].add(self.alloc, block); + continue; + } + + for (self.postOrder.items) |block2| { + _ = try result.items[block].add(self.alloc, block2); + } + } + + // // std.debug.print("after init Dominators\n", .{}); + // for (self.postOrder.items) |block| { + // std.debug.print("block = {any}, ", .{block}); + // result.items[block].print(); + // std.debug.print("\n", .{}); + // } + // while changes in any Dom(n) + // for each n in N - {n0}: + // Dom(n) = {n} union with intersection over Dom(p) for all p in pred(n) + // return Dom + var changes = true; + while (changes) { + changes = false; + for (self.postOrder.items, 0..) |block, i| { + if (i == 0) continue; + + // get the predecessors for this block + const preds = try self.getBlockIncomerIDs(block); + for (preds.items) |pred| { + // get the intersection of the dominators of the predecessors + // get Dom(p) + var predDom = result.items[pred]; + var blockDom = result.items[block]; + var intersection = try blockDom.intersectionOf(self.alloc, predDom); + _ = try intersection.add(self.alloc, block); + // std.debug.print("\nblock = {any}, pred = {any}\n", .{ block, pred }); + // std.debug.print("predDom\n", .{}); + // predDom.print(); + // std.debug.print("blockDm\n", .{}); + // blockDom.print(); + // std.debug.print("intersection\n", .{}); + // intersection.print(); + // std.debug.print("\n", .{}); + var changedInter = intersection.eql(blockDom); + if (!changedInter) { + result.items[block].deinit(self.alloc); + result.items[block] = try intersection.clone(self.alloc); + changes = true; + } else {} + intersection.deinit(self.alloc); + } + preds.deinit(); + } + } + self.dominators = result; + // // std.debug.print("Dominators\n", .{}); + // for (self.postOrder.items) |block| { + // std.debug.print("block = {any}, ", .{block}); + // self.dominators.items[block].print(); + // std.debug.print("\n", .{}); + // } + } + + // // Initialize the immediate dominators map to be empty + // idom = {} + + // // For each node n in the set of all nodes N + // for each n in N: + // // Exclude the node itself from its set of dominators to find possible idoms + // PossibleIdoms = Dom(n) - {n} + + // // The idom of node n is the unique dominator d in PossibleIdoms such that + // // every other dominator in PossibleIdoms is also dominated by d + // for each d in PossibleIdoms: + // if ∀d' ∈ PossibleIdoms - {d} : d' ∈ Dom(d) + // idom[n] = d + // break + // // Return the map of immediate dominators + // return idom + pub fn computeIdoms(self: *CfgFunction) !void { + // for each n in N; + for (self.postOrder.items) |block| { + // Exclude the node itself from its set of dominators to find possible idoms + var blockDom = self.dominators.items[block]; + var possibleIdoms = try blockDom.clone(self.alloc); + _ = possibleIdoms.remove(block); + + // The idom of node n is the unique dominator d in PossibleIdoms such that + // every other dominator in PossibleIdoms is also dominated by d + var posIter = possibleIdoms.iterator(); + while (posIter.next()) |d| { + var doms_all = true; + // // Check if d dominates all other elements in PossibleIdoms + // for each d' in PossibleIdoms: + // if d != d' and d' not in Dom(d): + // dominates_all = false + // break + var posIter2 = possibleIdoms.iterator(); + while (posIter2.next()) |d2| { + if (d.key_ptr.* == d2.key_ptr.*) { + continue; + } + if (!self.dominators.items[d.key_ptr.*].contains(d2.key_ptr.*)) { + // std.debug.print("block = {d}, d = {d}, d2 = {d}\n", .{ block, d.key_ptr.*, d2.key_ptr.* }); + doms_all = false; + break; + } + } + + if (doms_all) { + // std.debug.print("idom adding block = {d}, idom = {d}\n", .{ block, d.key_ptr.* }); + + _ = try self.idoms.put(block, d.key_ptr.*); + break; + } + } + possibleIdoms.deinit(self.alloc); + } + } + + // finds the children for a node + // function find_children(idom, all_nodes, target_node): + // children = [] + + // // Iterate over all nodes in the graph + // for each node in all_nodes: + // // Check if the immediate dominator of the current node is the target_node + // if idom[node] == target_node: + // // If so, add the node to the children list + // children.append(node) + + // // Return the list of children nodes + // return children + pub fn findChildren(self: *CfgFunction, target_node: CfgBlock.ID_t) !std.ArrayList(CfgBlock.ID_t) { + var children = std.ArrayList(CfgBlock.ID_t).init(self.alloc); + for (self.postOrder.items) |node| { + if (self.idoms.get(node) == target_node) { + try children.append(node); + } + } + return children; + } + + pub fn printChildren(self: *CfgFunction, node: CfgBlock.ID_t) void { + // print block name + self.printBlockName(node); + const children = self.domChildren.get(node); + if (children == null) { + return; + } + for (children.?.items) |child| { + std.debug.print("{d} ", .{child}); + } + std.debug.print("\n", .{}); + } + + pub fn printallChildren(self: *CfgFunction) void { + for (self.postOrder.items) |node| { + self.printChildren(node); + } + } + + pub fn generateDomChildren(self: *CfgFunction) !void { + for (self.postOrder.items) |node| { + try self.domChildren.put(node, try self.findChildren(node)); + } + } + + //computeDF[n]: + // S = {} + // for each node y in succ[n]: + // if idom(y) != n: + // S = S U {y} + // for each child c of n in the dom-tree: + // computeDF[c] + // for each w that is in the set DF[c] + // if n does not dom w, or n = w: + // S = S U {w} + // DF[n] = S + pub fn computeDomFront(self: *CfgFunction, nodeID: CfgBlock.ID_t) !void { + const node = self.blocks.items[nodeID]; + var S = std.ArrayList(CfgBlock.ID_t).init(self.alloc); + // for each node y in succ[n]: + for (node.outgoers) |outgoer| { + if (outgoer == null) { + continue; + } + const edge = self.edges.items[outgoer.?]; + if (self.idoms.get(edge.dest) != nodeID) { + // std.debug.print("edge.dest = {d}, nodeID = {d}\n", .{ edge.dest, nodeID }); + + try S.append(edge.dest); + } + } + // for each child c of n in the dom-tree: + var children = self.domChildren.get(nodeID); + if (children == null) { + return; + } + for (self.domChildren.get(nodeID).?.items) |child| { + try self.computeDomFront(child); + const DF = self.domFront.get(child); + if (DF == null) continue; + for (DF.?.items) |w| { + if (!self.dominators.items[w].contains(nodeID) or nodeID == w) { + try S.append(w); + } + } + } + try self.domFront.put(nodeID, S); + } + + /// just do it for all of them + pub fn computeAllDomFronts(self: *CfgFunction) !void { + for (self.postOrder.items) |node| { + try self.computeDomFront(node); + } + } + + pub fn genDominance(self: *CfgFunction) !void { + try self.generateDominators(); + try self.computeIdoms(); + try self.generateDomChildren(); + try self.computeAllDomFronts(); + } + + pub fn init(alloc: std.mem.Allocator) CfgFunction { + return .{ + .blocks = std.ArrayList(CfgBlock).init(alloc), + .edges = std.ArrayList(Edge).init(alloc), + .params = std.ArrayList(StrID).init(alloc), + .decls = std.ArrayList(StrID).init(alloc), + .declsUsed = std.AutoHashMap(StrID, bool).init(alloc), + .paramsUsed = std.ArrayList(StrID).init(alloc), + .statements = std.ArrayList(Ast.Node).init(alloc), + .postOrder = std.ArrayList(CfgBlock.ID_t).init(alloc), + .idoms = std.AutoHashMap(CfgBlock.ID_t, CfgBlock.ID_t).init(alloc), + .domChildren = std.AutoHashMap(CfgBlock.ID_t, std.ArrayList(CfgBlock.ID_t)).init(alloc), + .domFront = std.AutoHashMap(CfgBlock.ID_t, std.ArrayList(CfgBlock.ID_t)).init(alloc), + .dominators = std.ArrayList(Set.Set(CfgBlock.ID_t)).init(alloc), + .postOrderMap = std.AutoHashMap(CfgBlock.ID_t, usize).init(alloc), + .assignments = std.AutoHashMap(StrID, std.AutoHashMap(CfgBlock.ID_t, bool)).init(alloc), + .funNode = undefined, + .exitID = 1, + .alloc = alloc, + }; + } + + pub fn printBlockName(self: *CfgFunction, id: CfgBlock.ID_t) void { + const block = self.blocks.items[id]; + std.debug.print("\"{s}_{d}\"", .{ block.name, id }); + } + + pub fn assertEdgeBothSides(self: *CfgFunction, edgeID: Edge.ID_t) !void { + // get the edge + const edge = self.edges.items[edgeID]; + // get the src and dest + const src = edge.src; + const dest = edge.dest; + var destIncomers = self.blocks.items[dest].incomers; + var outgoers = self.blocks.items[src].outgoers; + var outGoList = std.ArrayList(Edge.ID_t).init(self.alloc); + defer outGoList.deinit(); + for (outgoers) |outgoer| { + if (outgoer == null) { + continue; + } + try outGoList.append(outgoer.?); + } + // check that the src has this edge + var srcFlag: bool = false; + var destFlag: bool = false; + for (outGoList.items) |out| { + if (out == edgeID) { + srcFlag = true; + } + } + // check that the dest has this edge + for (destIncomers.items) |incomer| { + if (incomer == edgeID) { + destFlag = true; + } + } + + if (destFlag and srcFlag) { + return; + } + unreachable; + } + + pub fn printBlockOutEdges(self: *CfgFunction, id: CfgBlock.ID_t) !void { + // get the blcok + const block = self.blocks.items[id]; + // get the outgoers + const outgoers = block.outgoers; + if (outgoers[0] != null) { + const edge = self.edges.items[outgoers[0].?]; + self.printBlockName(edge.src); + std.debug.print(" -> ", .{}); + self.printBlockName(edge.dest); + if (outgoers[1] == null) { + std.debug.print(";\n", .{}); + } else { + std.debug.print(", ", .{}); + const edge2 = self.edges.items[outgoers[1].?]; + self.printBlockName(edge2.dest); + std.debug.print(";\n", .{}); + } + } else if (outgoers[1] != null) { + const edge = self.edges.items[outgoers[1].?]; + self.printBlockName(edge.src); + std.debug.print(" -> ", .{}); + self.printBlockName(edge.dest); + std.debug.print(";\n", .{}); + } + } + + // FIXME + // pub fn printAstRange(ast: *const Ast, start: usize, end: usize) void { + // for (start..end) |idx| { + // const node = ast.get(idx).*; + // const kind = node.kind; + // const token = node.token; + // std.debug.print("{d}: {s} {s}\n", .{ idx, @tagName(kind), token._range.getSubStrFromStr(ast.input) }); + // } + // } + + // pub fn printOutStatemetns(self: *CfgFunction, ir: *IR, blockId: CfgBlock.ID_t) void { + // for (self.blocks.items[blockId].statements.items) |stmt| { + // switch (stmt.kind) { + // .Expression => { + // self. + // }, + // } + // std.debug.print("{s}\n", .{ir.intern_pool.get(stmt)}); + // } + // } + + pub fn printOutFunAsDot(self: *CfgFunction, ir: *IR) void { + std.debug.print("digraph G{{ \n", .{}); + std.debug.print("node [shape=box]\n", .{}); + for (self.postOrder.items) |block_id| { + var block = self.blocks.items[block_id]; + self.printBlockName(block.ID); + std.debug.print(" [label=", .{}); + self.printBlockName(block.ID); + std.debug.print("+\"\\n", .{}); + for (block.typedIdents.items) |ident| { + std.debug.print("{s}\\n", .{ir.getIdent(ident)}); + } + std.debug.print("\"];\n", .{}); + try self.printBlockOutEdges(block.ID); + } + // print out params and decls as a node + std.debug.print("params [label=\"params\\n", .{}); + for (self.params.items) |param| { + std.debug.print("{s}\\n", .{ir.getIdent(param)}); + } + std.debug.print("\"];\n", .{}); // end of params + std.debug.print("decls [label=\"decls\\n", .{}); + for (self.decls.items) |decl| { + std.debug.print("{s}\\n", .{ir.getIdent(decl)}); + } + std.debug.print("\"];\n", .{}); // end of decls + // print out the used decls + std.debug.print("declsUsed [label=\"declsUsed\\n", .{}); + var keyIter = self.declsUsed.keyIterator(); + while (keyIter.next()) |key| { + std.debug.print("{s}\\n", .{ir.getIdent(key.*)}); + } + std.debug.print("\"];\n", .{}); // end of declsUsed + std.debug.print("}}\n", .{}); + } + + pub fn addEdgeBetween(self: *CfgFunction, src: CfgBlock.ID_t, dest: CfgBlock.ID_t) !Edge { + const edge = Edge{ .src = src, .dest = dest, .ID = self.edges.items.len }; + try self.edges.append(edge); + // add the outgoer to the src block + var srcOutgoers = self.blocks.items[src].outgoers; + if (srcOutgoers[0] == null) { + self.blocks.items[src].outgoers[0] = edge.ID; + } else if (srcOutgoers[1] == null) { + self.blocks.items[src].outgoers[1] = edge.ID; + } else { + return error.TooManyOutgoers; + } + + // add the incomer to the dest block + try self.blocks.items[dest].incomers.append(edge.ID); + try self.assertEdgeBothSides(edge.ID); + + return edge; + } + + // 1. Initialize: + // - visited = empty set + // - reversePostOrder = empty list + + // 2. DFS Function: + // function DFS(node): + // if node is not in visited: + // visited.add(node) + // for each child in successors(node): + // DFS(child) + // reversePostOrder.prepend(node) // Prepend to build the list in reverse postorder + + // 3. Start DFS from Entry: + // - DFS(entryNode) + + // 4. Check Unvisited Nodes (optional, for handling disconnected graphs): + // for each node in CFG: + // if node is not visited: + // DFS(node) + + // 5. Result: + // - reversePostOrder now contains the nodes in reverse postorder/ + // DFS function + pub fn DFS(self: *CfgFunction, node: CfgBlock.ID_t, visited: *std.AutoHashMap(CfgBlock.ID_t, bool), reversePostOrder: *std.ArrayList(CfgBlock.ID_t)) !void { + if (visited.get(node) == null) { + try visited.put(node, true); + var outgoer = self.blocks.items[node].outgoers[1]; + if (outgoer != null) { + const edge = self.edges.items[outgoer.?]; + try DFS(self, edge.dest, visited, reversePostOrder); + } + outgoer = self.blocks.items[node].outgoers[0]; + if (outgoer != null) { + const edge = self.edges.items[outgoer.?]; + try DFS(self, edge.dest, visited, reversePostOrder); + } + try reversePostOrder.append(node); + } + } + + pub fn arrayListReverse(self: *std.ArrayList(CfgBlock.ID_t)) !void { + var i: usize = 0; + var j: usize = self.items.len - 1; + while (i < j) { + // swap ij + var temp = self.items[i]; + self.items[i] = self.items[j]; + self.items[j] = temp; + + i += 1; + j -= 1; + } + } + + pub fn isFloating(self: *CfgFunction, blockID: CfgBlock.ID_t) bool { + const income = self.blocks.items[blockID].incomers.items.len == 0; + const out = self.blocks.items[blockID].outgoers[0] == null and self.blocks.items[blockID].outgoers[1] == null; + return income and out; + } + + pub fn reversePostOrderComp(self: *CfgFunction) !void { + var visited = std.AutoHashMap(CfgBlock.ID_t, bool).init(self.alloc); + var reversePostOrder = std.ArrayList(CfgBlock.ID_t).init(self.alloc); + defer { + visited.deinit(); + } + + // start DFS from entry + if (self.blocks.items.len == 0) { + return; + } + try DFS(self, 0, &visited, &reversePostOrder); + + // check unvisited nodes + for (self.blocks.items) |block| { + if (visited.get(block.ID) == null) { + // check if the node has both inputs and outputs + if (!self.isFloating(block.ID)) { + try DFS(self, block.ID, &visited, &reversePostOrder); + } + } + } + try arrayListReverse(&reversePostOrder); + for (reversePostOrder.items, 0..) |block, i| { + try self.postOrderMap.put(block, i); + } + self.postOrder = reversePostOrder; + } + + pub fn printDomFront(self: *CfgFunction) !void { + // get dom iter + var domIter = self.domFront.keyIterator(); + while (domIter.next()) |dom| { + const domFront = self.domFront.get(dom.*).?; + std.debug.print("{any} domFront: ", .{dom.*}); + for (domFront.items) |front| { + std.debug.print("{any} ", .{front}); + } + std.debug.print("\n", .{}); + } + } + + pub fn generate( + func: *Function, + ast: *const Ast, + funNode: Ast.Node.Kind.FunctionType, + ir: *IR, + ) !CfgFunction { + var self = CfgFunction.init(func.alloc); + // fill the params and the decls + for (func.params.items) |param| { + try (self.params).append(param.name); + } + + const funBody = funNode.getBody(ast); + var declsIter = funBody.iterLocalDecls(ast); + while (declsIter.next()) |decl| { + const declNode = decl.kind.TypedIdentifier; + const declName = ir.internIdent(declNode.getName(ast)); + try self.decls.append(declName); + } + + // pre init body and exit blocks + const bodyInit = CfgBlock.init(func.alloc, "body"); + const exitInit = CfgBlock.init(func.alloc, "exit"); + const the_edge = try self.addBlocksWithEdge(bodyInit, exitInit); + + // get the statement from the function body + var statIter = funBody.iterStatements(ast); + try self.generateStatements(ast, ir, statIter, the_edge); + try self.reversePostOrderComp(); + try self.genDominance(); + // self.printallChildren(); + // try self.printDomFront(); + self.printOutFunAsDot(ir); + + // for every blocks's assignments add to the functions assignemnts + for (self.postOrder.items) |blockID| { + for (self.blocks.items[blockID].assignments.items) |ident| { + if (!self.assignments.contains(ident)) { + // init the assignments for the ident + try self.assignments.put(ident, std.AutoHashMap(CfgBlock.ID_t, bool).init(self.alloc)); + } + try self.assignments.getPtr(ident).?.put(blockID, true); + } + } + return self; + } + + pub fn cleanseOutgersRec(self: *CfgFunction, blockID: CfgBlock.ID_t) !void { + var outIDArr = std.ArrayList(CfgBlock.ID_t).init(self.alloc); + for (self.blocks.items[blockID].outgoers, 0..) |out, i| { + if (out != null) { + try self.assertEdgeBothSides(out.?); + // get the edge + var outEdge = self.edges.items[out.?]; + if (outEdge.dest == self.exitID) { + if (self.blocks.items[blockID].incomers.items.len == 0 and blockID != 0) {} else { + continue; + } + } + + try outIDArr.append(self.edges.items[out.?].dest); + + self.blocks.items[blockID].outgoers[i] = null; + } + } + for (outIDArr.items) |outID| { + if (outID == self.exitID) { + // check if we have no incomers, check if we are not block 0 + if (self.blocks.items[blockID].incomers.items.len == 0 and blockID != 0) {} else { + continue; + } + } + var succIncomers = self.blocks.items[outID].incomers; + var newSuccIncomers = std.ArrayList(Edge.ID_t).init(self.alloc); + for (succIncomers.items) |incomer| { + var succInEdge = self.edges.items[incomer]; + if (succInEdge.src == blockID) { + continue; + } + if (succInEdge.dest != outID) { + utils.todo("This edge has been inproperly configed fix it\n", .{}); + } + try newSuccIncomers.append(incomer); + } + self.blocks.items[outID].incomers.deinit(); + self.blocks.items[outID].incomers = newSuccIncomers; + } + + for (outIDArr.items) |outID| { + if (self.blocks.items[outID].incomers.items.len == 0) { + try self.cleanseOutgersRec(outID); + } + } + + outIDArr.deinit(); + } + + pub fn generateStatements( + self: *CfgFunction, + ast: *const Ast, + ir: *IR, + _statIter: Ast.NodeIter(.Statement), + _edge: Edge, + ) !void { + var edge = _edge; + var cBlock = edge.src; + var statIter = _statIter; + + // to pass onto exiting child statIter must be update to be at the end of the code within the control flow + // the edge must be updated such that the src is the exiting child, and that the dest is the block that follows top level this should alway be pointing to exit + while (statIter.nextInc()) |c_stat| { + const statementIndex = c_stat.kind.Statement.statement; + const statementNode = c_stat.kind.Statement; + // self.printBlockName(cBlock); + // ast.printNodeLine(c_stat); + const innerNode = ast.get(statementIndex); + const kind = innerNode.kind; + const finalIndex = c_stat.kind.Statement.finalIndex; + _ = finalIndex; + + // if not control flow + if (!statementNode.isControlFlow(ast)) { + // add all the idents in the statement to the block + try self.blocks.items[cBlock].addIdentsFromStatement(ir, ast, c_stat); + // add the statement to the block + try self.blocks.items[cBlock].statements.append(c_stat); + // std.debug.print("items in block ", .{}); + // self.printBlockName(cBlock); + // std.debug.print("{any}\n", .{self.blocks.items[cBlock].statements.items}); + continue; + } + + // add all the idents in the block (that we will now be leaving) + // to the declsUsed + for (self.blocks.items[cBlock].typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + + switch (kind) { + // early optimization of removing all code that is after a return + .Return => { + // add all the idents in the statement to the block + try self.blocks.items[cBlock].addIdentsFromStatement(ir, ast, c_stat); + + // add the statement to the block + try self.blocks.items[cBlock].statements.append(c_stat); + + // if this is a return in the body, we are done + if (self.exitID == edge.dest) { + // add all the idents in the block (that we will now be leaving) + // to the declsUsed + for (self.blocks.items[cBlock].typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + return; + } + try self.cleanseOutgersRec(cBlock); + var exitEdge = try self.addEdgeBetween(cBlock, self.exitID); + _ = exitEdge; + + for (self.blocks.items[cBlock].typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + return; + }, + .ConditionalIf => |_if| { + const isIfElse = _if.isIfElse(ast); + const as_ifCond = ast.get(_if.cond).*; + var as_thenBlock: Ast.Node = undefined; + var as_elseBlock: ?Ast.Node = undefined; + var as_elseBlockID: ?usize = undefined; + + if (!isIfElse) { + as_thenBlock = ast.get(_if.block).*; + } else { + const condife = ast.get(_if.block).kind.ConditionalIfElse; + as_thenBlock = ast.get(condife.ifBlock).*; + as_elseBlockID = condife.elseBlock; + as_elseBlock = ast.get(condife.elseBlock).*; + } + var ed = edge; + // if block + // create 4 new blocks + // if.cond + var ifCond = CfgBlock.init(self.alloc, "if.cond"); + try ifCond.addIdentsFromExpression(ir, ast, as_ifCond); + try ifCond.statements.append(as_ifCond); + for (ifCond.typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + ifCond.conditional = true; + var ifCondID = try self.addBlockOnEdge(ifCond, ed); + ed.src = ifCondID; + + // then.body + // will add the idents and such after + var thenBody = CfgBlock.init(self.alloc, "then.body"); + var thenBodyID = try self.addBlockOnEdge(thenBody, ed); + ed.src = thenBodyID; + const body_range = as_thenBlock.kind.Block.range(ast); + var ifThenEdge = ed; + + // then.exit + var thenExit = CfgBlock.init(self.alloc, "then.exit"); + var thenExitID = try self.addBlockOnEdge(thenExit, ed); + ed.src = thenExitID; + + ifThenEdge = self.edges.items[self.blocks.items[thenBodyID].outgoers[0].?]; + try self.assertEdgeBothSides(ifThenEdge.ID); + + // if.exit + var ifExit = CfgBlock.init(self.alloc, "if.exit"); + var ifExitID = try self.addBlockOnEdge(ifExit, ed); + ed.src = ifExitID; + + edge = ed; + if (!isIfElse) { + _ = try self.addEdgeBetween(ifCondID, ifExitID); + } + + if (body_range != null) { + // var ifBody_iter: Ast.NodeList(.Statement) = undefined; + // ifBody_iter = ifBody_iter.init(ast, body_range[0], body_range[1]); + // const ifBody_iter = Ast.NodeList(Ast.Node.Kind.Statement).init(ast, body_range[0], body_range[1]); + const ifBody_iter = Ast.NodeIter(@typeInfo(Ast.Node.Kind).Union.tag_type.?.Statement).init(ast, body_range.?[0], body_range.?[1]); + try self.generateStatements(ast, ir, ifBody_iter, self.edges.items[ifThenEdge.ID]); + statIter.skipTo(body_range.?[1]); + } else { + statIter.skipTo(_if.block); + } + + if (isIfElse) { + // else block + // else.body + var elseBody = CfgBlock.init(self.alloc, "else.body"); + var elseExit = CfgBlock.init(self.alloc, "else.exit"); + var elseEdge = try self.addBlocksWithEdge(elseBody, elseExit); + _ = try self.blocks.items[ifCondID].addOutgoer(self, elseEdge.src); + _ = try self.blocks.items[elseEdge.dest].addOutgoer(self, ifExitID); + + const else_range = as_elseBlock.?.kind.Block.range(ast); + if (else_range != null) { + var erage = else_range.?; + const elseBody_iter = Ast.NodeIter(@typeInfo(Ast.Node.Kind).Union.tag_type.?.Statement).init(ast, erage[0], erage[1]); + try self.generateStatements(ast, ir, elseBody_iter, elseEdge); + statIter.skipTo(erage[1]); + } else { + statIter.skipTo(as_elseBlockID.?); + } + } + cBlock = ifExitID; + }, + .ConditionalIfElse => {}, + .While => |_while| { + var ed = edge; + const w_cond_ast = _while.cond; + const as_wCond = ast.get(w_cond_ast).*; + const w_block_ast = _while.block; + const w_block_ast_node = ast.get(w_block_ast).*; + + // while loop + var wCond = CfgBlock.init(self.alloc, "while.cond1"); + wCond.conditional = true; + try wCond.addIdentsFromExpression(ir, ast, as_wCond); + try wCond.statements.append(as_wCond); + for (wCond.typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + var wCondID = try self.addBlockOnEdge(wCond, ed); + ed.src = wCondID; + + var wCond2 = CfgBlock.init(self.alloc, "while.cond2"); + wCond2.conditional = true; + try wCond2.addIdentsFromExpression(ir, ast, as_wCond); + try wCond2.statements.append(as_wCond); + for (wCond2.typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + var wCondID2 = try self.addBlockOnEdge(wCond2, ed); + ed.src = wCondID2; + + // b edge is between the conds -> fist item in wCond2 edges + var bEdge = self.edges.items[self.blocks.items[wCondID].outgoers[0].?]; + + // create the body block + var wBody = CfgBlock.init(self.alloc, "while.body"); + var wBodyID = try self.addBlockOnEdge(wBody, bEdge); + + // create the fillback block (to be added between cond2 and body) + var wFillback = CfgBlock.init(self.alloc, "while.fillback"); + var fbEdge = Edge{ .src = wCondID2, .dest = wBodyID, .ID = self.edges.items.len }; + try self.edges.append(fbEdge); + self.blocks.items[wCondID2].outgoers[1] = fbEdge.ID; + try self.blocks.items[wBodyID].incomers.append(fbEdge.ID); + try self.assertEdgeBothSides(fbEdge.ID); + + var wFillbackID = try self.addBlockOnEdge(wFillback, fbEdge); + + // create the exit block + var wExit = CfgBlock.init(self.alloc, "while.exit"); + var wExitID = try self.addBlockOnEdge(wExit, ed); + ed.src = wExitID; + + // swap wCond2's outgoers + var wCond2Outgoers = self.blocks.items[wCondID2].outgoers; + self.blocks.items[wCondID2].outgoers[0] = wCond2Outgoers[1]; + self.blocks.items[wCondID2].outgoers[1] = wCond2Outgoers[0]; + + _ = try self.addEdgeBetween(wCondID, wExitID); + + // add the body to the block + const body_range = w_block_ast_node.kind.Block.range(ast); + if (body_range != null) { + const wBody_iter = Ast.NodeIter(@typeInfo(Ast.Node.Kind).Union.tag_type.?.Statement).init(ast, body_range.?[0], body_range.?[1]); + + try self.generateStatements(ast, ir, wBody_iter, self.edges.items[bEdge.ID]); + // iterate over every block from wExitId to the most recent block + // and add the typed idents from the body to the fillback block + for (wExitID..self.blocks.items.len) |id| { + for (self.blocks.items[id].typedIdents.items) |ident| { + try self.blocks.items[wFillbackID].typedIdents.append(ident); + try self.blocks.items[wFillbackID].assignments.append(ident); + } + } + // for the idents in body add them to the fillback block + for (self.blocks.items[wBodyID].typedIdents.items) |ident| { + try self.blocks.items[wFillbackID].typedIdents.append(ident); + try self.blocks.items[wFillbackID].assignments.append(ident); + } + // for the idents in while cond2 add them to the fillback block + for (self.blocks.items[wCondID2].typedIdents.items) |ident| { + try self.blocks.items[wFillbackID].typedIdents.append(ident); + try self.blocks.items[wFillbackID].assignments.append(ident); + } + statIter.skipTo(body_range.?[1]); + } else { + statIter.skipTo(w_block_ast); + } + cBlock = wExitID; + edge = ed; + }, + else => { + unreachable; + }, + } + } + // add all the idents in the block (that we will now be leaving) + // to the declsUsed + for (self.blocks.items[cBlock].typedIdents.items) |ident| { + try self.declsUsed.put(ident, true); + } + } + + pub fn addBlocksWithEdge(self: *CfgFunction, blockSrc_: CfgBlock, blockDest_: CfgBlock) !Edge { + var blockSrc = blockSrc_; + var blockDest = blockDest_; + const srcID = self.blocks.items.len; + const destID = self.blocks.items.len + 1; + blockSrc.ID = srcID; + blockDest.ID = destID; + try self.blocks.append(blockSrc); + try self.blocks.append(blockDest); + + const edge = Edge{ .src = srcID, .dest = destID, .ID = self.edges.items.len }; + try self.edges.append(edge); + + self.blocks.items[srcID].outgoers[0] = edge.ID; + try (self.blocks.items[destID].incomers).append(edge.ID); + try self.assertEdgeBothSides(edge.ID); + return edge; + } + + pub fn addBlock(self: *CfgFunction, block_: CfgBlock) !CfgBlock.ID_t { + var block = block_; + const id = self.blocks.items.len; + block.ID = id; + try (self.blocks).append(block); + return id; + } + + pub fn addBlockOnEdge(self: *CfgFunction, block_: CfgBlock, edge_: Edge) !CfgBlock.ID_t { + var edge = edge_.ID; + var block = block_; + const id = self.blocks.items.len; + block.ID = id; + try (self.blocks).append(block); + // try (self.blocks.items[edge.dest]).addIncomer(id); + + return try self.insertBlockOnEdge(id, edge); + } + + pub fn insertBlockOnEdge(self: *CfgFunction, blockID: CfgBlock.ID_t, edge: Edge.ID_t) !CfgBlock.ID_t { + // find the edge + const e = self.edges.items[edge]; + + // new edge between old source and new block + const newEdge = Edge{ .src = e.src, .dest = blockID, .ID = self.edges.items.len }; + try self.edges.append(newEdge); + + _ = try (self.blocks.items[e.src]).updateEdge(self, edge, newEdge.ID); + + self.blocks.items[blockID].outgoers[0] = edge; + try (self.blocks.items[blockID].incomers).append(newEdge.ID); + + // update the old edge to point from the new block to the old + self.edges.items[edge].src = blockID; + try self.assertEdgeBothSides(newEdge.ID); + try self.assertEdgeBothSides(edge); + return blockID; + } + + pub fn getBlock(self: *CfgFunction, id: CfgBlock.ID_t) CfgBlock { + return self.blocks.items[id]; + } +}; + +pub const BasicBlock = struct { + name: []const u8, + incomers: std.ArrayList(Label), + outgoers: [2]?Label, + defs: Set.Set(StrID), + uses: std.AutoHashMap(StrID, bool), + // a map of strID to the last definition within this block + versionMap: std.AutoHashMap(StrID, Ref), + // and ORDERED list of the instruction ids of the instructions in this block + insts: List, + phiInsts: std.ArrayList(Function.InstID), + phiMap: std.AutoHashMap(StrID, Function.InstID), + + pub fn addRefToPhi(self: BasicBlock.ID, fun: *Function, ref: Ref, bbIn: BasicBlock.ID, name: StrID) !Function.InstID { + // std.debug.print("ref.i {any}\n", .{ref.i}); + const bb = fun.bbs.get(self); + var phiInstID = bb.getPhi(name); + if (phiInstID == null) { + phiInstID = try IR.BasicBlock.addEmptyPhiOrClear(self, fun, name); + } + const phiInst = fun.insts.get(phiInstID.?); + var phi = IR.Inst.Phi.get(phiInst.*); + // std.debug.print("ref.i {any}\n", .{ref.i}); + try phi.entries.append(IR.PhiEntry{ .ref = ref, .bb = bbIn }); + // std.debug.print("entries: {any}\n", .{phi.entries.items}); + var updatedPhiInst = phi.toInst(); + fun.insts.set(phiInstID.?, updatedPhiInst); + return phiInstID.?; + } + + pub fn addRefToPhiReturn(self: BasicBlock.ID, fun: *Function, ref: Ref, bbIn: BasicBlock.ID, ir: *IR) !Function.InstID { + var name = ir.internIdent("return_reg"); + // std.debug.print("ref.i {any}\n", .{ref.i}); + const bb = fun.bbs.get(self); + var phiInstID = bb.getPhi(name); + if (phiInstID == null) { + phiInstID = try IR.BasicBlock.addEmptyPhiReturn(self, fun, ir); + } + const phiInst = fun.insts.get(phiInstID.?); + var phi = IR.Inst.Phi.get(phiInst.*); + // std.debug.print("ref.i {any}\n", .{ref.i}); + try phi.entries.append(IR.PhiEntry{ .ref = ref, .bb = bbIn }); + // std.debug.print("entries: {any}\n", .{phi.entries.items}); + var updatedPhiInst = phi.toInst(); + fun.insts.set(phiInstID.?, updatedPhiInst); + return phiInstID.?; + } + + pub fn addEmptyPhiReturn(self: BasicBlock.ID, fun: *Function, ir: *IR) !Function.InstID { + var ident = ir.internIdent("return_reg"); + const bbMap = fun.bbs.get(self).*.phiMap; + if (bbMap.contains(ident)) { + const contInst = bbMap.get(ident).?; + const fInst = fun.insts.get(contInst).*; + var phiInst = IR.Inst.Phi.get(fInst); + try phiInst.entries.resize(0); + const phiInstInst = phiInst.toInst(); + fun.insts.set(contInst, phiInstInst); + try fun.bbs.get(self).versionMap.put(ident, fInst.res); + return contInst; + } + const identType = fun.typesMap.get(ident).?; + var phiEntries = std.ArrayList(IR.PhiEntry).init(fun.alloc); + const phi = Inst.phi(IR.Ref.default, identType, phiEntries); + + // reserve + const regID = try fun.regs.add(undefined); + const instID = try fun.insts.add(undefined); + + // construct the register to be added, using the reserved IDs + const reg = Register{ .id = regID, .inst = instID, .name = ident, .bb = self, .type = identType }; + var inst = phi; + inst.res = Ref.local(regID, ident, identType); // update the reference of the incoming instruction + + // save + fun.regs.set(regID, reg); + fun.insts.set(instID, inst); // in the inst array update the resulting instruction + try fun.bbs.get(self).versionMap.put(ident, inst.res); + + try fun.bbs.get(self).addPhiInst(instID, ident); + return instID; + } + + // creates a new instruction phi node and adds it to the block, adds it to the phiMap + // and version map + pub fn addEmptyPhiOrClear(self: BasicBlock.ID, fun: *Function, ident: StrID) !Function.InstID { + const bbMap = fun.bbs.get(self).*.phiMap; + if (bbMap.contains(ident)) { + const contInst = bbMap.get(ident).?; + const fInst = fun.insts.get(contInst).*; + var phiInst = IR.Inst.Phi.get(fInst); + try phiInst.entries.resize(0); + const phiInstInst = phiInst.toInst(); + fun.insts.set(contInst, phiInstInst); + try fun.bbs.get(self).versionMap.put(ident, fInst.res); + return contInst; + } + const identType = fun.typesMap.get(ident).?; + var phiEntries = std.ArrayList(IR.PhiEntry).init(fun.alloc); + const phi = Inst.phi(IR.Ref.default, identType, phiEntries); + + // reserve + const regID = try fun.regs.add(undefined); + const instID = try fun.insts.add(undefined); + + // construct the register to be added, using the reserved IDs + const reg = Register{ .id = regID, .inst = instID, .name = ident, .bb = self, .type = identType }; + var inst = phi; + inst.res = Ref.local(regID, ident, identType); // update the reference of the incoming instruction + + // save + fun.regs.set(regID, reg); + fun.insts.set(instID, inst); // in the inst array update the resulting instruction + try fun.bbs.get(self).versionMap.put(ident, inst.res); + + try fun.bbs.get(self).addPhiInst(instID, ident); + return instID; + } + + // ads aphi node with %name = phi [%undef, %pred block] + pub fn addPhiWithPreds(bbID: BasicBlock.ID, fun: *Function, ident: StrID) !Function.InstID { + const bb = fun.bbs.get(bbID); + const currentPhiInstID = try BasicBlock.addEmptyPhiOrClear(bbID, fun, ident); + const bbPhiInst = fun.insts.get(currentPhiInstID).*; + var bbPhi = IR.Inst.Phi.get(bbPhiInst); + + for (bb.incomers.items) |it| { + // const predBB = fun.bbs.get(it); + // const predInst = predBB.versionMap.get(ident); + // if there is no phi for the pred block then continue + // if (predInst == null) { + var phiEntryTemp = IR.PhiEntry{ .ref = IR.Ref.default, .bb = it }; + phiEntryTemp.ref.name = ident; + try bbPhi.entries.append(phiEntryTemp); + // continue; + // } + // try bbPhi.entries.append(IR.PhiEntry{ .ref = predInst.?, .bb = it }); + } + + const phiInst = bbPhi.toInst(); + fun.insts.set(currentPhiInstID, phiInst); + return currentPhiInstID; + } + + pub fn addPhiInst(self: *BasicBlock, instID: Function.InstID, ident: StrID) !void { + try self.phiInsts.append(instID); + try self.phiMap.put(ident, instID); + try self.uses.put(ident, true); + } + + pub fn getPhi(self: *BasicBlock, ident: StrID) ?Function.InstID { + return self.phiMap.get(ident); + } + + /// The ID of a basic block is it's index within the arraylist of + /// basic blocks in the `Function` type + /// This is done differently than the LUT based approach for almost + /// everthing else in the IR because the order of the basic blocks + pub const ID = u32; + + pub const List = OrderedList(Function.InstID); + + pub fn init(alloc: std.mem.Allocator, name: []const u8) BasicBlock { + return .{ + .incomers = std.ArrayList(Label).init(alloc), + .defs = Set.Set(StrID).init(), + .uses = std.AutoHashMap(StrID, bool).init(alloc), + .versionMap = std.AutoHashMap(StrID, Ref).init(alloc), + .outgoers = [2]?Label{ null, null }, + .insts = List.init(alloc), + .phiInsts = std.ArrayList(Function.InstID).init(alloc), + .phiMap = std.AutoHashMap(StrID, Function.InstID).init(alloc), + .name = name, + }; + } + + pub fn addIncomer(self: *BasicBlock, incomer: Label) !void { + // see the comment in `addOutgoer` for why this is done + // alternative is to just ignore duplicates while actually + // using the cfg, but that seems kinda annoying ngl + for (self.incomers.items) |existing| { + if (existing == incomer) { + return; + } + } + try self.incomers.append(incomer); + } + + pub fn addOutgoer(self: *BasicBlock, outgoer: Label) !void { + // note the `or _ == outgoer` to allow adding the same outgoer twice + // without reprecussions. This just makes me less worried about adding + // outgoers in `Function` helper methods + if (self.outgoers[0] == null or self.outgoers[0] == outgoer) { + self.outgoers[0] = outgoer; + } else if (self.outgoers[1] == null or self.outgoers[1] == outgoer) { + self.outgoers[1] = outgoer; + } else { + return error.TooManyOutgoers; + } + } + + pub fn getLastInstID(self: *const BasicBlock) ?Function.InstID { + if (self.insts.len == 0) { + return null; + } + return self.insts.items()[self.insts.len - 1]; + } +}; + +/// A lookup table where the index of the item is the key +/// and the size never changes after being initialized +pub fn StaticSizeLookupTable(comptime Key: type, comptime Value: type, comptime getKey: fn (val: Value) Key) type { + return struct { + items: []Value, + len: u32, + + const Self = @This(); + + pub const Index = u32; + + pub fn init(items: []Value) Self { + return .{ .items = items, .len = @intCast(items.len) }; + } + + /// FIXME: if you see this function followed + /// immediately by a call to `entry` + /// it should bre replaced with a call to lookup + /// FIXME: remove and make safeIndexOf be indexOf -> ?Value + /// or !Value + pub fn indexOf(self: Self, key: Key) Index { + const maybe_id = self.safeIndexOf(key); + if (maybe_id) |id| { + return id; + } + @panic("Item not found in lookup table"); + } + + pub fn safeIndexOf(self: Self, key: Key) ?Index { + for (self.items, 0..) |existing, i| { + const itemKey = getKey(existing); + if (itemKey == key) { + return @intCast(i); + } + } + return null; + } + + pub fn lookup(self: Self, key: Key) !Value { + const maybeid = self.safeIndexOf(key); + if (maybeid) |id| { + return self.items[id]; + } + return error.NotFound; + } + + /// Like lookup but also returns the index + pub fn find(self: Self, key: Key) ?struct { index: Index, value: Value } { + for (self.items, 0..) |existing, i| { + const itemKey = getKey(existing); + if (itemKey == key) { + return .{ .index = @intCast(i), .value = existing }; + } + } + return null; + } + + pub fn entry(self: Self, key: Index) Value { + return self.items[key]; + } + + /// Helper mainly for the `fromLUT` function for when the value is the key + pub fn IDgetKeyHelper(val: anytype) @TypeOf(val) { + return val; + } + + /// Helper for creating a static size lookup table from another LUT given it's length + /// Because both will use the indices as keys, 0 in the new LUT will be the same + /// as 0 in the old LUT and so on + pub fn initSized(alloc: std.mem.Allocator, size: usize, maybeDefault: ?Value) !Self { + const default = maybeDefault orelse undefined; + + const items = try alloc.alloc(Value, size); + + for (0..size) |i| { + items[i] = default; + } + return Self.init(items); + } + pub fn contains(self: Self, key: Key) bool { + return self.safeIndexOf(key) != null; + } + }; +} + +/// A lookup table where the index of the item is the key +/// and it is backed by an `ArrayList`. The the arraylist itself is +/// append only and therefore the keys never change +pub fn LookupTable(comptime Key: type, comptime Value: type, comptime getKey: fn (val: Value) Key) type { + return struct { + items: List, + len: u32, + + const Self = @This(); + + pub const ID = u32; + + pub const List = std.ArrayList(Value); + + pub fn init(alloc: std.mem.Allocator) Self { + const items = List.init(alloc); + return .{ .items = items, .len = @intCast(items.items.len) }; + } + + /// A wrapper around `safeLookup` that panics if the key is not + /// found + pub fn lookup(self: Self, key: Key) Key { + const maybe_id = self.safeLookup(key); + if (maybe_id) |id| { + return id; + } + @panic("Item not found in lookup table"); + } + + pub fn safeLookup(self: Self, key: Key) ?ID { + for (self.items.items, 0..) |existing, i| { + const itemKey = getKey(existing); + if (itemKey == key) { + return i; + } + } + return null; + } + + pub fn get(self: Self, key: ID) Value { + return self.items.items[key]; + } + + pub fn set(self: *Self, key: ID, value: Value) void { + self.items.items[key] = value; + } + + pub fn add(self: *Self, val: Value) !ID { + // const id = self.len; + try self.items.append(val); + self.len += 1; + return @intCast(self.items.items.len - 1); + // return id; + } + }; +} + +/// A wrapper around `std.ArrayList` to provide a way to get +/// the index when you append, and some other helpers TBD + nicer interface +/// (.len field, .get method etc.) +pub fn OrderedList(comptime T: type) type { + return struct { + list: std.ArrayList(T), + len: u32, + + // TODO: the initial idea for this was to have an `order` + // array that just keeps the indexes of the items in `list` + // in order and can be updated however. + // so far I have not encountered a time I couldn't just make + // the basic blocks in order, and the instructions order is + // maintained by keeping the list of instructions + // added to the `insts` field in the basic block in order + // We might still need it though, I just haven't seen the reason + // to add the additional complexity it would introduce/ + // refactors it would possibly require + // the field would look like: + // order: std.ArrayList(u32), + // and we'd just add some helper functions to ensure something + // comes after something else, do manipulations, etc. + + pub const Self = @This(); + + pub fn init(alloc: std.mem.Allocator) Self { + return .{ .list = std.ArrayList(T).init(alloc), .len = 0 }; + } + + /// A helper for iterating instead of `field.list.items` + pub inline fn items(self: Self) []T { + return self.list.items; + } + + // TODO: consider refactoring to return just `T` + // and create another `getPtr` for when you need a pointer + // the `.*` everywhere is kinda annoying ngl + pub inline fn get(self: Self, idx: u32) *T { + return &self.list.items[idx]; + } + + /// Appends an item and returns the index + pub fn add(self: *Self, item: T) !u32 { + const id = self.len; + try self.list.append(item); + self.len += 1; + return id; + } + + pub inline fn set(self: *Self, idx: u32, item: T) void { + self.list.items[idx] = item; + } + + /// same as add, but does not return the index + /// for when you just don't care yk? + pub fn append(self: *Self, item: T) !void { + _ = try self.add(item); + } + + pub fn orderedRemove(self: *Self, idx: u32) T { + self.len -= 1; + return self.list.orderedRemove(idx); + } + }; +} + +pub const StructType = struct { + // NOTE: same as this structs ID + name: StrID, + size: u32, + /// Lookup table for field names, where index of fields StrID + /// is the index of the field i.e. its FieldID + /// The slice is assumed to be allocated and freed if necessary by the TypeList + fieldLookup: FieldList, + + pub const ID = StrID; + const FieldList = StaticSizeLookupTable(StrID, Field, Field.getKey); + + pub const Field = struct { + name: StrID, + type: Type, + + pub fn init(name: StrID, ty: Type) Field { + return .{ .name = name, .type = ty }; + } + + pub fn getKey(self: Field) StrID { + return self.name; + } + }; + + pub const FieldID = u32; + + pub fn init(name: StrID, size: u32, fieldList: []Field) StructType { + const fieldLookup = FieldList.init(fieldList); + return .{ .name = name, .fieldLookup = fieldLookup, .size = size }; + } + + pub fn getFieldWithName(self: StructType, name: StrID) !struct { index: u32, field: Field } { + const field = self.fieldLookup.find(name) orelse { + return error.FieldNotFound; + }; + + return .{ .index = field.index, .field = field.value }; + } + + pub fn indexOfFieldWithName(self: StructType, name: StrID) !FieldList.Index { + return self.fieldLookup.safeIndexOf(name) orelse error.FieldNotFound; + } + + pub fn fields(self: *const StructType) []Field { + return self.fieldLookup.items; + } + + pub fn numFields(self: StructType) usize { + return @as(usize, self.fieldLookup.len); + } + + pub fn getKey(self: StructType) StrID { + return self.name; + } + + pub fn getType(self: *const StructType) Type { + return .{ .strct = self.name }; + } +}; + +pub const StructID = StrID; + +/// Literally just a list of types +/// Abstracted so we can change it as needed and define helpers +pub const TypeList = struct { + items: List, + + // TODO: use lookup table + pub const List = StaticSizeLookupTable(StructID, Item, Item.getKey); + pub const Item = StructType; + + pub fn init() TypeList { + return .{ .items = undefined }; + } + + /// Note the lack of a way to add one item at a time, + /// only many at once + pub fn fill(self: *TypeList, items: []Item) void { + self.items = List.init(items); + } + + pub fn len(self: *const TypeList) usize { + return @intCast(self.items.len); + } + + pub fn get(self: *const TypeList, id: StructID) !Item { + return self.items.lookup(id) catch error.TypeNotFound; + } + + /// WARN: I think I saw somewhere that the AutoArrayHashMap preserves + /// insertion order but I'm not sure + pub fn index(self: *const TypeList, idx: usize) Item { + return self.items.items[idx]; + } + + pub fn getFromIdent(self: *const TypeList, ident: StrID) !Item { + return self.items.lookup(ident) catch error.TypeNotFound; + } + + // TODO: !!! +}; + +/// Literally just a list of types... for now... bwahahaha +/// Abstracted so we can change it as needed and define helpers +pub const InstructionList = struct { + items: List, + + pub const List = std.ArrayList(Inst); + + pub fn init(alloc: std.mem.Allocator) InstructionList { + return .{ .items = List.init(alloc) }; + } + + // TODO: !!! +}; + +/// This is for LLVM 3.4.2 (with some differences for newer versions noted inline). The full +/// manual is linked from the course website. There are often multiple variants of each of the +/// following instructions; I list here only what I used (a sampling of what is available). +pub const Op = enum { + // Arithmetic + /// = add , + /// = mul , + /// = sdiv , + /// = sub , + // Boolean + /// = and , + /// = or , + /// = xor , + Binop, + + // Comparison and Branching + /// = icmp , ; @.g., = eq + Cmp, + /// br i1 , label , label + Br, + /// `br label ` + /// I know I know this isn't the actual name, + /// but this is what it means and + /// I dislike Mr. Lattner's design decision + Jmp, + + // Loads & Stores + /// ` = load * ` + /// newer: + /// ` = load , * ` + Load, + /// `store value, * ` + Store, + /// ` = getelementptr * , i1 0, i32 ` + /// newer: + /// ` = getelementptr , * , i1 0, i32 ` + Gep, + + // Invocation + /// ` = call ()` + /// newer: + /// ` = call ()` + Call, + /// `ret void` + /// `ret ` + Ret, + // Allocation + /// ` = alloca ` + Alloc, + + // Miscellaneous + /// ` = bitcast to ; cast type` + Bitcast, + /// ` = trunc to ; truncate to ty2` + Trunc, + /// ` = zext to ; zero-extend to ty2` + Zext, + /// ` = sext to ; sign-extend to ty2` + Sext, + /// ` = phi [,