diff --git a/Cargo.lock b/Cargo.lock index c70bcf9..e80796d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,9 +112,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "ariadne" @@ -389,9 +389,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", "regex-automata", @@ -613,7 +613,7 @@ version = "2.9.2" dependencies = [ "cairo-lang-debug", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -927,9 +927,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.4" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ "jobserver", "libc", @@ -993,7 +993,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1149,28 +1149,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "derive_more" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" -dependencies = [ - "derive_more-impl", -] - -[[package]] -name = "derive_more-impl" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "syn 2.0.90", - "unicode-xid", -] - [[package]] name = "diff" version = "0.1.13" @@ -1303,9 +1281,9 @@ dependencies = [ [[package]] name = "float-cmp" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" dependencies = [ "num-traits", ] @@ -1318,9 +1296,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "foldhash" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" [[package]] name = "funty" @@ -1347,7 +1325,7 @@ checksum = "43eaff6bbc0b3a878361aced5ec6a2818ee7c541c5b33b5880dfa9a86c23e9e7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1394,9 +1372,9 @@ dependencies = [ [[package]] name = "good_lp" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97630e1e456d7081c524488a87d8f8f7ed0fd3100ba10c55e3cfa7add5ce05c6" +checksum = "10efcd6c7d6f84cb5b4f9155248e0675deab9cfb92d0edbcb25cb81490b65ae7" dependencies = [ "fnv", "microlp", @@ -1497,17 +1475,15 @@ dependencies = [ "anyhow", "bimap", "chumsky", - "clap", - "derive_more", "downcast-rs", "ethnum", "hieratika-errors", "hieratika-flo", "inkwell", "itertools 0.13.0", + "miette", "ouroboros", "rand", - "tracing", ] [[package]] @@ -1530,7 +1506,7 @@ dependencies = [ "inkwell", "itertools 0.13.0", "miette", - "thiserror 2.0.8", + "thiserror 2.0.9", ] [[package]] @@ -1598,7 +1574,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1655,7 +1631,7 @@ source = "git+https://github.com/stevefan1999-personal/inkwell?rev=0c1e5dd52cf3e dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1834,9 +1810,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libredox" @@ -1911,9 +1887,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "microlp" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e0c5664f9959f1c3970d523a22f0319024282cb754358c2afc7e1d45280ae3" +checksum = "8113ec0619201ef0ead05ecafe9ba59b525ab73508456b8d35dbaf810cd07704" dependencies = [ "log", "sprs", @@ -1947,7 +1923,7 @@ checksum = "23c9b935fbe1d6cbd1dac857b54a688145e2d93f48db36010514d0f612d0ad67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2127,7 +2103,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2253,9 +2229,9 @@ checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pkg-config" @@ -2301,9 +2277,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "predicates" -version = "3.1.2" +version = "3.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" dependencies = [ "anstyle", "difflib", @@ -2315,15 +2291,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" [[package]] name = "predicates-tree" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" dependencies = [ "predicates-core", "termtree", @@ -2365,7 +2341,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "version_check", "yansi", ] @@ -2381,9 +2357,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -2547,7 +2523,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2596,9 +2572,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" @@ -2637,7 +2613,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2657,9 +2633,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] @@ -2676,13 +2652,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2693,14 +2669,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.134" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" dependencies = [ "itoa", "memchr", @@ -2841,7 +2817,7 @@ checksum = "bbc159a1934c7be9761c237333a57febe060ace2bc9e3b337a59a37af206d19f" dependencies = [ "starknet-curve", "starknet-ff", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2945,9 +2921,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.90" +version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", @@ -2983,9 +2959,9 @@ dependencies = [ [[package]] name = "termtree" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" [[package]] name = "textwrap" @@ -3008,11 +2984,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.8" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "thiserror-impl 2.0.8", + "thiserror-impl 2.0.9", ] [[package]] @@ -3023,18 +2999,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "thiserror-impl" -version = "2.0.8" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3138,7 +3114,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3263,7 +3239,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "wasm-bindgen-shared", ] @@ -3285,7 +3261,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3402,9 +3378,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.20" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" dependencies = [ "memchr", ] @@ -3457,7 +3433,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3477,7 +3453,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] diff --git a/crates/compiler/Cargo.toml b/crates/compiler/Cargo.toml index 43b6de9..60006ad 100644 --- a/crates/compiler/Cargo.toml +++ b/crates/compiler/Cargo.toml @@ -16,16 +16,14 @@ rust-version.workspace = true [dependencies] bimap.workspace = true chumsky = "0.9.3" -clap.workspace = true -derive_more = { version = "1.0.0", features = ["full"] } downcast-rs = "1.2.1" ethnum.workspace = true inkwell.workspace = true itertools.workspace = true hieratika-errors.workspace = true hieratika-flo.workspace = true +miette.workspace = true ouroboros = "0.18.4" -tracing.workspace = true rand = "0.8.5" [dev-dependencies] diff --git a/crates/compiler/input/compilation/add.ll b/crates/compiler/input/compilation/add.ll index 096ec1c..6200b1c 100644 --- a/crates/compiler/input/compilation/add.ll +++ b/crates/compiler/input/compilation/add.ll @@ -1,7 +1,7 @@ ; ModuleID = '9ox3ykpp0gbrqxqlz7ajwa9w6' source_filename = "9ox3ykpp0gbrqxqlz7ajwa9w6" -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-none" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" @alloc_4190527422e5cc48a15bd1cb4f38f425 = private unnamed_addr constant <{ [33 x i8] }> <{ [33 x i8] c"crates/rust-test-input/src/lib.rs" }>, align 1 @alloc_5b4544c775a23c08ca70c48dd7be27fc = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_4190527422e5cc48a15bd1cb4f38f425, [16 x i8] c"!\00\00\00\00\00\00\00\05\00\00\00\05\00\00\00" }>, align 8 diff --git a/crates/compiler/input/compilation/bad_data_layout.ll b/crates/compiler/input/compilation/bad_data_layout.ll new file mode 100644 index 0000000..8d13058 --- /dev/null +++ b/crates/compiler/input/compilation/bad_data_layout.ll @@ -0,0 +1,10 @@ +; ModuleID = 'opcodes.ll' +source_filename = "opcodes.ll" +target datalayout = "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" + +define i64 @hieratika_test_add(i64 %left, i64 %right) unnamed_addr { +start: + %0 = add nuw nsw i64 %left, %right + ret i64 %0 +} diff --git a/crates/compiler/input/compilation/bad_target_triple.ll b/crates/compiler/input/compilation/bad_target_triple.ll new file mode 100644 index 0000000..a3fc4e7 --- /dev/null +++ b/crates/compiler/input/compilation/bad_target_triple.ll @@ -0,0 +1,10 @@ +; ModuleID = 'opcodes.ll' +source_filename = "opcodes.ll" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +define i64 @hieratika_test_add(i64 %left, i64 %right) unnamed_addr { +start: + %0 = add nuw nsw i64 %left, %right + ret i64 %0 +} diff --git a/crates/compiler/input/compilation/constants.ll b/crates/compiler/input/compilation/constants.ll new file mode 100644 index 0000000..0416d24 --- /dev/null +++ b/crates/compiler/input/compilation/constants.ll @@ -0,0 +1,55 @@ +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" + +@test_const = constant { i1, [9 x i8] } { i1 0, [9 x i8] c"hieratika" } + +; @constant_pointer_const = constant ptr @test_const +; @constant_pointer_const_in_struct = constant { i1, ptr } { i1 0, ptr @test_const } + +; @function_pointer_const = constant ptr @hieratika_test_const_integer +; @function_pointer_const_in_struct = constant { i1, ptr } { i1 0, ptr @hieratika_test_const_integer } + +define ptr @hieratika_test_reference_const() unnamed_addr { +start: + ret ptr @test_const +} + +define i64 @hieratika_test_const_integer() unnamed_addr { +start: + ret i64 0 +} + +define double @hieratika_test_const_float() unnamed_addr { +start: + ret double 0.0 +} + +define void @hieratika_test_const_pointer() unnamed_addr { +start: + %addr = alloca ptr + store ptr blockaddress(@hieratika_test_const_pointer, %bb1), ptr %addr + ret void +bb1: + unreachable +} + +define void @hieratika_test_const_array() unnamed_addr { +start: + %ptr = alloca ptr + store [2 x i8] [i8 0, i8 1], ptr %ptr + ret void +} + +define void @hieratika_test_const_string() unnamed_addr { +start: + %ptr = alloca ptr + store [9 x i8] c"hieratika", ptr %ptr + ret void +} + +define void @hieratika_test_const_struct() unnamed_addr { +start: + %ptr = alloca ptr + store { i8, i1 } { i8 0, i1 1 }, ptr %ptr + ret void +} diff --git a/crates/compiler/input/compilation/opcodes.ll b/crates/compiler/input/compilation/opcodes.ll index 12f8fdb..e54c63d 100644 --- a/crates/compiler/input/compilation/opcodes.ll +++ b/crates/compiler/input/compilation/opcodes.ll @@ -1,7 +1,7 @@ ; ModuleID = 'opcodes.ll' source_filename = "opcodes.ll" -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-none" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" ; Arithmetic and logic operations diff --git a/crates/compiler/input/compilation/terminators.ll b/crates/compiler/input/compilation/terminators.ll index 1fa8ecd..40d6f42 100644 --- a/crates/compiler/input/compilation/terminators.ll +++ b/crates/compiler/input/compilation/terminators.ll @@ -1,7 +1,7 @@ ; ModuleID = 'opcodes.ll' source_filename = "opcodes.ll" -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-none" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" ; Supported terminator instructions diff --git a/crates/compiler/src/constant.rs b/crates/compiler/src/constant.rs index 275336e..eec02c6 100644 --- a/crates/compiler/src/constant.rs +++ b/crates/compiler/src/constant.rs @@ -1,5 +1,13 @@ //! Useful constants for use within the compiler. +/// The expected target triple for our platform, intended to be used for +/// validation during compilation. +pub const TARGET_TRIPLE: &str = "riscv64"; + +/// The expected data layout for our platform, intended to be used for +/// validation during compilation. +pub const TARGET_DATA_LAYOUT: &str = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + /// The size of a byte on our architecture. pub const BYTE_SIZE_BITS: usize = 8; diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index 7bf9b27..a245a78 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -113,11 +113,13 @@ pub mod obj_gen; pub mod pass; pub mod polyfill; -use hieratika_errors::compile::llvm::Result; +use hieratika_errors::compile::llvm::{Error, Result}; use hieratika_flo::FlatLoweredObject; use crate::{ + constant::{TARGET_DATA_LAYOUT, TARGET_TRIPLE}, context::SourceContext, + llvm::data_layout::DataLayout, obj_gen::ObjectGenerator, pass::{PassManager, PassManagerReturnData, analysis::module_map::BuildModuleMap}, polyfill::PolyfillMap, @@ -194,6 +196,10 @@ impl Compiler { /// /// - If the module mapping pass has not been run. pub fn run(mut self) -> Result { + // Before we do anything, we need to validate that the input is compatible with + // our target. + self.is_compatible_target()?; + // First we have to run all the passes and collect their data. let PassManagerReturnData { context, data } = self.passes.run(self.context)?; @@ -208,6 +214,59 @@ impl Compiler { let builder = ObjectGenerator::new(&mod_name, data, context, self.polyfill_map)?; builder.run() } + + /// Validates that the context for the compilation contains an LLVM module + /// that has been compiled for a platform that is compatible with our target + /// platform, returning `true` if it is compatible and `false` otherwise. + /// + /// # Errors + /// + /// - [`Error::IncompatibleDataLayout`] if the module being compiled has a + /// data layout not compatible with the expected one. + /// - [`Error::IncompatibleTargetSpecification`] if the module being + /// compiled has a target triple not compatible with the expected one. + /// - [`Error::InvalidDataLayoutSpecification`] if the module being compiled + /// has a data layout string that cannot be parsed as a data layout + /// specification. + /// + /// # Panics + /// + /// - If the compiler's statically-known data layout string cannot be parsed + /// into a valid data layout. + pub fn is_compatible_target(&self) -> Result<()> { + // We start by grabbing the actual data layout and comparing it to the one we + // know is correct. + let (actual_layout_str, actual_target) = self.context.analyze_module(|module| { + let layout = module.get_data_layout().as_str().to_str()?.to_string(); + let target = module.get_triple().as_str().to_str()?.to_string(); + + Ok((layout, target)) + })?; + let actual_layout = DataLayout::new(&actual_layout_str)?; + let expected_data_layout = DataLayout::new(TARGET_DATA_LAYOUT) + .expect("Statically known data layout could not be parsed"); + + // The target being compatible currently means that it has the same target + // triple and the same data layout. This notion MAY be relaxed in the future. + let targets_valid = actual_target == TARGET_TRIPLE || actual_target.is_empty(); + let layouts_valid = actual_layout_str.is_empty() || actual_layout == expected_data_layout; + + if !targets_valid { + Err(Error::IncompatibleTargetSpecification( + actual_target, + TARGET_TRIPLE.to_string(), + ))?; + } + + if !layouts_valid { + Err(Error::IncompatibleDataLayout( + actual_layout_str, + TARGET_DATA_LAYOUT.to_string(), + ))?; + } + + Ok(()) + } } /// Allows for building a [`Compiler`] instance while retaining the defaults for diff --git a/crates/compiler/src/llvm/typesystem.rs b/crates/compiler/src/llvm/typesystem.rs index acc607a..ab51711 100644 --- a/crates/compiler/src/llvm/typesystem.rs +++ b/crates/compiler/src/llvm/typesystem.rs @@ -58,6 +58,9 @@ pub enum LLVMType { /// The 16-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). i16, + /// The 24-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i24, + /// The 32-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). i32, @@ -156,6 +159,8 @@ impl LLVMType { self, Self::bool | Self::i8 + | Self::i16 + | Self::i24 | Self::i32 | Self::i64 | Self::i128 @@ -224,6 +229,7 @@ impl LLVMType { f64, i8, i16, + i24, i32, i64, i128, @@ -231,7 +237,7 @@ impl LLVMType { void, }; match self { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => 1, + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => 1, void | Metadata => 0, Array(array_ty) => array_ty.size_of(), Structure(struct_ty) => struct_ty.size_of(), @@ -258,6 +264,7 @@ impl Display for LLVMType { LLVMType::bool => "i1".to_string(), LLVMType::i8 => "i8".to_string(), LLVMType::i16 => "i16".to_string(), + LLVMType::i24 => "i24".to_string(), LLVMType::i32 => "i32".to_string(), LLVMType::i64 => "i64".to_string(), LLVMType::i128 => "i128".to_string(), @@ -412,6 +419,7 @@ impl<'ctx> TryFrom<&IntType<'ctx>> for LLVMType { 1 => Self::bool, 8 => Self::i8, 16 => Self::i16, + 24 => Self::i24, 32 => Self::i32, 64 => Self::i64, 128 => Self::i128, diff --git a/crates/compiler/src/obj_gen/data.rs b/crates/compiler/src/obj_gen/data.rs index eddaf7c..4ca7cf1 100644 --- a/crates/compiler/src/obj_gen/data.rs +++ b/crates/compiler/src/obj_gen/data.rs @@ -169,6 +169,7 @@ impl ObjectContext { LLVMType::bool => Type::Bool, LLVMType::i8 => Type::Signed8, LLVMType::i16 => Type::Signed16, + LLVMType::i24 => Type::Signed24, LLVMType::i32 => Type::Signed32, LLVMType::i64 => Type::Signed64, LLVMType::i128 => Type::Signed128, diff --git a/crates/compiler/src/obj_gen/mod.rs b/crates/compiler/src/obj_gen/mod.rs index b908ead..540b24b 100644 --- a/crates/compiler/src/obj_gen/mod.rs +++ b/crates/compiler/src/obj_gen/mod.rs @@ -51,7 +51,7 @@ use crate::{ context::SourceContext, llvm::{ special_intrinsics::SpecialIntrinsics, - typesystem::{LLVMArray, LLVMStruct, LLVMType}, + typesystem::{LLVMArray, LLVMFunction, LLVMStruct, LLVMType}, }, messages::{ INSTRUCTION_NAMED, @@ -483,11 +483,41 @@ impl ObjectGenerator { data.flo.symbols.data.insert(global_name.to_string(), global_id); } + // An initializer takes no parameters and also returns no values, but + // nevertheless must have a signature as it is "callable". + let sig = Signature { + params: Vec::new(), + returns: Vec::new(), + location: None, + }; + // While we now have a variable definition that can be referenced elsewhere, the // variable is inherently uninitialized. FLO provides an "initializers" // mechanism that provides blocks that are executed by the CRT0. - if let Some(_initializer_code) = global.get_initializer() { - // TODO (#36) Actually implement this. + if let Some(initializer_code) = global.get_initializer() { + // We start by creating the initializer itself, which is a block that assigns a + // value to the constant variable. + let initializer_block = data.flo.add_block(|bb| -> Result<()> { + // As the signature is the same for every initializer, we can just re-use the + // one above. + bb.set_signature(&sig); + + // We need a dummy function context to get a constant, so we create one here. + let mut func_ctx = + FunctionContext::new(LLVMFunction::new(LLVMType::void, &[]), data.map.clone()); + + // The body of our initializer only needs to get the constant value, which we + // start by doing using the existing constant handling code. + util::build_const_into(global_id, &initializer_code, bb, &mut func_ctx)?; + + // An initializer must end without returning any values, but also must return. + bb.end_with_return(Vec::new()); + + Ok(()) + })?; + + // We then set the initializer in the object to be run at program startup. + data.flo.initializers.push(initializer_block); } Ok(()) @@ -973,13 +1003,12 @@ impl ObjectGenerator { // In order to have some idea of how to generate this, we need to know the // source type of the element at the pointer into which the GEP is indexing. + // + // While GEP is meant to operate purely over aggregates (array or struct types), + // it can be used in its first argument to offset over pointers themselves. In + // other words, it performs simple pointer arithmetic. let source_type = LLVMType::try_from(instruction.get_gep_source_element_type()?)?; - // This source type has to be of either an array or struct type. - if !matches!(source_type, LLVMType::Structure(_) | LLVMType::Array(_)) { - Err(only_on_aggregates_error(&instruction, &source_type))?; - } - // The GEP instruction has an arbitrary number of operands. The first is the // pointer from which the extraction is performed, while the subsequent operands // are the indices. @@ -1167,7 +1196,7 @@ impl ObjectGenerator { // the store differently based on the type being stored. let stored_type = LLVMType::try_from(stored_val.get_type())?; match &stored_type { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { // In the case of directly storing a primitive, the offset is _always_ going to // be zero. self.store_primitive(&stored_type, stored_val_var, pointer_var, 0, bb)?; @@ -1224,10 +1253,7 @@ impl ObjectGenerator { #[allow(clippy::enum_glob_use)] use LLVMType::*; assert!( - matches!( - typ, - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr - ), + typ.is_primitive(), "Primitive type expected, but {typ} found instead" ); @@ -1242,7 +1268,7 @@ impl ObjectGenerator { // Next, we need to look up the polyfill to call. let polyfill_name = self.polyfills - .try_get_polyfill("store", &[typ.clone(), ptr, i64], &LLVMType::void)?; + .try_get_polyfill("store", &[typ.clone(), ptr, i64], &void)?; // The store opcode has no return value, so we can simply generate the call // here, passing the value to store, the pointer to store to, and the offset @@ -1292,7 +1318,7 @@ impl ObjectGenerator { for (elem_ty, elem_val) in struct_elements.iter().zip(element_variables.into_iter()) { match elem_ty { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive(elem_ty, elem_val, pointer, accumulated_offset, bb)?; } Array(array_type) => { @@ -1353,7 +1379,7 @@ impl ObjectGenerator { for array_element in array_elements { match &array_elem_type { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive( array_elem_type, array_element, @@ -1428,7 +1454,7 @@ impl ObjectGenerator { // handle the type being loaded differently. let output_type = LLVMType::try_from(instruction.get_type())?; let output_var = match &output_type { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { // In the case of directly loading a primitive, the offset is _always_ going to // be zero. self.load_primitive(&output_type, pointer_var, 0, bb)? @@ -1491,10 +1517,7 @@ impl ObjectGenerator { #[allow(clippy::enum_glob_use)] use LLVMType::*; assert!( - matches!( - typ, - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr - ), + typ.is_primitive(), "Primitive type expected, but {typ} found instead" ); @@ -1549,7 +1572,7 @@ impl ObjectGenerator { .map(|elem_ty| { // We have to start by dispatching based on the child type let loaded_var = match elem_ty { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.load_primitive(elem_ty, pointer, accumulated_offset, bb)? } Array(array_type) => { @@ -1610,7 +1633,7 @@ impl ObjectGenerator { let mut component_variables: Vec = Vec::new(); for _ in 0..array_elem_count { component_variables.push(match array_elem_type { - bool | i8 | i16 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { + bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.load_primitive(array_elem_type, pointer, accumulated_offset, bb)? } Array(array_type) => { diff --git a/crates/compiler/src/obj_gen/util.rs b/crates/compiler/src/obj_gen/util.rs index 9e90131..3b40f56 100644 --- a/crates/compiler/src/obj_gen/util.rs +++ b/crates/compiler/src/obj_gen/util.rs @@ -1,7 +1,7 @@ //! This module contains miscellaneous utilities that are useful aids in //! generating a `FlatLoweredObject`. -use std::ffi::CStr; +use std::ffi::{CStr, c_uint}; use chumsky::{ Parser, @@ -18,7 +18,12 @@ use inkwell::{ basic_block::BasicBlock, llvm_sys::{ LLVMAtomicOrdering, - core::{LLVMGetIndices, LLVMGetNumIndices, LLVMPrintValueToString}, + core::{ + LLVMGetAggregateElement, + LLVMGetIndices, + LLVMGetNumIndices, + LLVMPrintValueToString, + }, }, values::{AsValueRef, BasicValueEnum, InstructionOpcode, InstructionValue}, }; @@ -43,14 +48,11 @@ pub type OptionalInkwellOperand<'ctx> = Option>; /// /// - [`Error::MalformedLLVM`] if the variable referenced by `value` is not /// defined before its usage. +/// - [`Error::UnsupportedType`] if an LLVM vector type is encountered. /// /// # Panics /// /// - If any value is both non-constant and lacking a name. -/// - If an array value constant is encountered, as these are currently -/// unsupported. -/// - If a struct value constant is encountered, as these are currently -/// unsupported. pub fn get_var_or_const( value: &BasicValueEnum, bb: &mut BlockBuilder, @@ -61,91 +63,145 @@ pub fn get_var_or_const( let value_name = value.get_name().to_str()?; let value_type = LLVMType::try_from(value.get_type())?; - let id: VariableId = if value_name.is_empty() { + let variable_id = if value_name.is_empty() { // Here, it is truly anonymous, which means we have run into the case where it // is simply an inline constant. - let const_value = match value { - BasicValueEnum::IntValue(int_val) => { - assert!( - int_val.is_const(), - "Unnamed integer value was not a constant: {int_val:?}" - ); - let constant_value = int_val - .get_zero_extended_constant() - .expect("Integer already known to be a constant had no constant value"); - - // Unfortunately, our constants from LLVM are not in the same format as we need - // in FLO, so we have to convert it while maintaining the correct bytes. Here, - // we take advantage of Rust's casting behavior: casting between signed and - // unsigned of the same size is a no-op, and casting from a smaller unsigned - // number to a larger unsigned number causes zero-extension. - // - // See: https://doc.rust-lang.org/nightly/reference/expressions/operator-expr.html#semantics - u128::from(constant_value) - } - BasicValueEnum::FloatValue(float_val) => { - assert!( - float_val.is_const(), - "Unnamed floating-point value was not a constant: {float_val:?}" - ); - let (const_float, _) = float_val.get_constant().expect( - "Floating-point value already known to be a constant had no constant value", - ); - - // Unfortunately, we do not natively support FP constants in FLO, so we have to - // represent the _bits_ of the float inside our constant value. This behavior - // is safe as we construct the value with the same bytes as the float, and then - // use the above-mentioned zero-extension to fit it into the u128. - u128::from(u64::from_le_bytes(const_float.to_le_bytes())) - } - BasicValueEnum::PointerValue(ptr_val) => { - assert!( - ptr_val.is_const(), - "Unnamed pointer value was not a constant: {ptr_val:?}" - ); - - // We cannot have direct pointer constants written out, but instead they take - // the form of a constant expression such as the `blockaddress` function. To - // make matters more complex, neither Inkwell nor llvm-sys provide ways to get - // at the arguments to the constant, so we are forced to parse it out manually. - // - // Note that CStr is explicitly a NON-OWNING wrapper over a const* c_char, and - // hence we are safe to convert it to the similarly non-owning str here for - // processing. When our `str` gets dropped, so does the `CStr` but the - // underlying allocation is left in the control of llvm-sys via Inkwell. - let pointer_const_text_c = - unsafe { CStr::from_ptr(LLVMPrintValueToString(ptr_val.as_value_ref())) }; - let pointer_const_text_str = pointer_const_text_c.to_str()?; - - // Next we can parse the blockaddr representation out of the string. - let block_addr = - BlockAddress::parser().parse(pointer_const_text_str).map_err(|e| { - Error::MalformedLLVM(format!( - "Expected a valid blockaddress expression but found \ - {pointer_const_text_str} instead: {e:?}", - )) - })?; + let variable = bb.add_variable(ObjectContext::flo_type_of(&value_type)?); + build_const_into(variable, value, bb, func_ctx)?; - // If this is a valid parse, we now have both the name of the function in which - // the block occurs, and the name of the block in that function from which the - // address is generated. We assume that: - // - // 1. The function exists in the current translation unit. - // 2. The block is valid in the specified function. - // - // All other conditions are a malformed LLVM error. - let target_function_blocks = func_ctx - .module_blocks() - .get(&block_addr.function_name) - .ok_or_else(|| { - Error::MalformedLLVM(format!( - "blockaddress constant attempted to look up a block in non-local \ - function {}", - &block_addr.function_name - )) - })?; + variable + } else { + func_ctx.try_lookup_variable(value_name)? + }; - let block_id = target_function_blocks + Ok(variable_id) +} + +/// Builds the constant `value` into the provided `variable` inside the block +/// described by `bb` and the function described by `func_ctx`. +/// +/// # Errors +/// +/// - [`Error::MalformedLLVM`] if the variable referenced by `value` is not +/// defined before its usage. +/// - [`Error::UnsupportedType`] if an LLVM vector type is encountered. +/// +/// # Panics +/// +/// - If the provided `value` is not a valid constant. +/// - If the provided variable type does not match that expected for the value. +#[expect(clippy::too_many_lines)] +pub fn build_const_into( + variable: VariableId, + value: &BasicValueEnum, + bb: &mut BlockBuilder, + func_ctx: &mut FunctionContext, +) -> Result<()> { + // We can always _get_ the name of a value, but in the case where it is an + // inline constant this is the empty string. + let value_type = LLVMType::try_from(value.get_type())?; + let value_type_flo = ObjectContext::flo_type_of(&value_type)?; + assert_eq!( + bb.context.variables.get(variable).typ, + value_type_flo, + "The type of variable {variable} did not match the type of the provided value but is \ + required to." + ); + + match value { + BasicValueEnum::IntValue(int_val) => { + assert!( + int_val.is_const(), + "Unnamed integer value was not a constant: {int_val:?}" + ); + let constant_value = int_val + .get_zero_extended_constant() + .expect("Integer already known to be a constant had no constant value"); + + // Unfortunately, our constants from LLVM are not in the same format as we need + // in FLO, so we have to convert it while maintaining the correct bytes. Here, + // we take advantage of Rust's casting behavior: casting between signed and + // unsigned of the same size is a no-op, and casting from a smaller unsigned + // number to a larger unsigned number causes zero-extension. + // + // See: https://doc.rust-lang.org/nightly/reference/expressions/operator-expr.html#semantics + let const_value = u128::from(constant_value); + let flo_const = ConstantValue { + value: const_value, + typ: value_type_flo.clone(), + }; + + bb.assign_const(variable, flo_const, Vec::new(), None); + } + BasicValueEnum::FloatValue(float_val) => { + assert!( + float_val.is_const(), + "Unnamed floating-point value was not a constant: {float_val:?}" + ); + let (const_float, _) = float_val.get_constant().expect( + "Floating-point value already known to be a constant had no constant value", + ); + + // Unfortunately, we do not natively support FP constants in FLO, so we have to + // represent the _bits_ of the float inside our constant value. This behavior + // is safe as we construct the value with the same bytes as the float, and then + // use the above-mentioned zero-extension to fit it into the u128. + let const_value = u128::from(u64::from_le_bytes(const_float.to_le_bytes())); + let flo_const = ConstantValue { + value: const_value, + typ: value_type_flo.clone(), + }; + + bb.assign_const(variable, flo_const, Vec::new(), None); + } + BasicValueEnum::PointerValue(ptr_val) => { + assert!( + ptr_val.is_const(), + "Unnamed pointer value was not a constant: {ptr_val:?}" + ); + + // We cannot have direct pointer constants written out, but instead they take + // the form of a constant expression such as the `blockaddress` function. To + // make matters more complex, neither Inkwell nor llvm-sys provide ways to get + // at the arguments to the constant, so we are forced to parse it out manually. + // + // Note that CStr is explicitly a NON-OWNING wrapper over a const* c_char, and + // hence we are safe to convert it to the similarly non-owning str here for + // processing. When our `str` gets dropped, so does the `CStr` but the + // underlying allocation is left in the control of llvm-sys via Inkwell. + let pointer_const_text_c = + unsafe { CStr::from_ptr(LLVMPrintValueToString(ptr_val.as_value_ref())) }; + let pointer_const_text_str = pointer_const_text_c.to_str()?; + + // Next we can parse the blockaddr representation out of the string. + let block_addr = BlockAddress::parser().parse(pointer_const_text_str).map_err(|e| { + Error::MalformedLLVM(format!( + "Expected a valid blockaddress expression but found {pointer_const_text_str} \ + instead: {e:?}", + )) + })?; + + // If this is a valid parse, we now have both the name of the function in which + // the block occurs, and the name of the block in that function from which the + // address is generated. We assume that: + // + // 1. The function exists in the current translation unit. + // 2. The block is valid in the specified function. + // + // All other conditions are a malformed LLVM error. + let target_function_blocks = func_ctx + .module_blocks() + .get(&block_addr.function_name) + .ok_or_else(|| { + Error::MalformedLLVM(format!( + "blockaddress constant attempted to look up a block in non-local function \ + {}", + &block_addr.function_name + )) + })?; + + let block_id = + target_function_blocks .get_by_left(&block_addr.block_ref) .ok_or_else(|| { Error::MalformedLLVM(format!( @@ -154,38 +210,42 @@ pub fn get_var_or_const( )) })?; - let result_variable = bb.simple_get_new_block_address(*block_id); - - // This case is not part of the normal flow, so we do a direct return. - return Ok(result_variable); - } - BasicValueEnum::ArrayValue(_) => { - unimplemented!("Array value constants are not implemented (#91)") - } - BasicValueEnum::StructValue(_) => { - unimplemented!("Struct value constants are not implemented (#91)") - } - BasicValueEnum::VectorValue(_) | BasicValueEnum::ScalableVectorValue(_) => Err( - Error::unsupported_type("LLVM vector types are not supported"), - )?, - }; - - // With the constant value obtained, we can shove it into the actual constant, - // and stick that in the context. - let flo_const = ConstantValue { - value: const_value, - typ: ObjectContext::flo_type_of(&value_type)?, - }; - - // With that done, we can create the constant variable. - bb.simple_assign_new_const(flo_const) - } else { - // If the value name was _not_ empty, then it is a reference to what - // should be an existing variable. - func_ctx.try_lookup_variable(value_name)? - }; + bb.get_block_address(variable, *block_id, Vec::new(), None); + } + BasicValueEnum::ArrayValue(array_val) => { + assert!( + array_val.is_const(), + "Unnamed array value was not a constant {array_val:?}" + ); + + let constant_arguments = extract_constant_aggregate_values(value)?; + let constant_ids = constant_arguments + .iter() + .map(|c| get_var_or_const(c, bb, func_ctx)) + .collect::>>()?; + + bb.construct(variable, constant_ids, Vec::new(), None); + } + BasicValueEnum::StructValue(struct_val) => { + assert!( + struct_val.is_const(), + "Unnamed structure value was not a constant {struct_val:?}" + ); + + let constant_arguments = extract_constant_aggregate_values(value)?; + let constant_ids = constant_arguments + .iter() + .map(|c| get_var_or_const(c, bb, func_ctx)) + .collect::>>()?; + + bb.construct(variable, constant_ids, Vec::new(), None); + } + BasicValueEnum::VectorValue(_) | BasicValueEnum::ScalableVectorValue(_) => Err( + Error::unsupported_type("LLVM vector types are not supported"), + )?, + } - Ok(id) + Ok(()) } /// A representation of a [`blockaddress`](https://llvm.org/docs/LangRef.html#addresses-of-basic-blocks) @@ -457,6 +517,64 @@ pub fn expect_int_from_bv(value: BasicValueEnum) -> u64 { int_from_bv(value).expect("The provided value was not an integer") } +/// Extracts constant elements from an aggregate type. +/// +/// # Errors +/// +/// - [`Error::MalformedLLVM`] if the elements of the constant value are +/// non-constant. +/// - [`Error::UnsupportedType`] if the type of the provided `value` cannot be +/// expressed in the compiler's type language. +/// +/// # Panics +/// +/// If the provided `value` is not a constant aggregate type. +pub fn extract_constant_aggregate_values<'ctx>( + value: &BasicValueEnum<'ctx>, +) -> Result>> { + let val_type = LLVMType::try_from(value.get_type())?; + let val_ref = match value { + BasicValueEnum::ArrayValue(val) if val.is_const() => val.as_value_ref(), + BasicValueEnum::StructValue(val) if val.is_const() => val.as_value_ref(), + _ => panic!( + "Attempted to extract constant elements from non-aggregate or non-constant type \ + {value:?}" + ), + }; + + // We need the number of elements in the value that would be const, and the + // easiest way to do this is to pull the information out of the type. + let num_elements = match &val_type { + LLVMType::Array(arr) => arr.count, + LLVMType::Structure(structure) => structure.elements.len(), + _ => panic!( + "Attempted to extract constant elements from non-aggregate or non-constant type \ + {val_type}" + ), + }; + + // This operation is safe as we ensure that we are only accessing within the + // already-known bounds of the type of the value. + // + // Furthermore, the only values that can exist in this place as elements _are_ + // ones that can exist as basic values, so we satisfy the condition to construct + // a new BasicValueEnum safely. + let const_values = (0..num_elements) + .map(|i| { + let i = c_uint::try_from(i).map_err(|_| { + Error::MalformedLLVM(format!( + "Could not convert {i} into a valid index in an aggregate with type {}", + &val_type + )) + })?; + let value_ref = unsafe { LLVMGetAggregateElement(val_ref, i) }; + Ok(unsafe { BasicValueEnum::new(value_ref) }) + }) + .collect::>>()?; + + Ok(const_values) +} + /// Extracts a name from the provided basic `value`, or returns [`None`] if /// that `value` is not possible. /// diff --git a/crates/compiler/src/pass/analysis/module_map.rs b/crates/compiler/src/pass/analysis/module_map.rs index d0f3a25..1d48fd4 100644 --- a/crates/compiler/src/pass/analysis/module_map.rs +++ b/crates/compiler/src/pass/analysis/module_map.rs @@ -496,8 +496,7 @@ mod test { // The data layout should have been picked up correctly from the module, and we // know that parsing works, so we check equality let data_layout = &map.data_layout; - let expected_data_layout = - DataLayout::new("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128")?; + let expected_data_layout = DataLayout::new("e-m:e-p:64:64-i64:64-i128:128-n32:64-S128")?; assert_eq!(data_layout, &expected_data_layout); Ok(()) diff --git a/crates/compiler/src/polyfill.rs b/crates/compiler/src/polyfill.rs index 5732390..0388837 100644 --- a/crates/compiler/src/polyfill.rs +++ b/crates/compiler/src/polyfill.rs @@ -1181,6 +1181,7 @@ impl PolyfillMap { LLVMType::bool, LLVMType::i8, LLVMType::i16, + LLVMType::i24, LLVMType::i32, LLVMType::i64, LLVMType::i128, @@ -1249,6 +1250,6 @@ mod test { fn has_correct_polyfill_count() { let polyfills = PolyfillMap::new(); let count = polyfills.iter().count(); - assert_eq!(count, 970); + assert_eq!(count, 1103); } } diff --git a/crates/compiler/tests/bug_111.rs b/crates/compiler/tests/bug_111.rs index b5fef2b..1f81f25 100644 --- a/crates/compiler/tests/bug_111.rs +++ b/crates/compiler/tests/bug_111.rs @@ -4,8 +4,9 @@ mod common; #[test] -fn accepts_anonymous_function_argument_names() -> anyhow::Result<()> { +fn accepts_anonymous_function_argument_names() -> miette::Result<()> { // We start by constructing and running the compiler + common::set_miette_reporting()?; let compiler = common::default_compiler_from_path("input/bug/bug-111.ll")?; let flo = compiler.run()?; diff --git a/crates/compiler/tests/common/mod.rs b/crates/compiler/tests/common/mod.rs index 0302db1..4d95b2f 100644 --- a/crates/compiler/tests/common/mod.rs +++ b/crates/compiler/tests/common/mod.rs @@ -1,10 +1,19 @@ //! Common utilities for the integration tests, these are intended to make it //! easier to write complex tests of the compiler's functionality. -use std::path::Path; +// Ensures that we can import this common module into tests without getting warnings for every +// function we do not use. +#![allow(dead_code)] + +use std::{collections::HashMap, path::Path}; use hieratika_compiler::{Compiler, CompilerBuilder, context::SourceContext}; -use hieratika_flo::FlatLoweredObject; +use hieratika_flo::{ + FlatLoweredObject, + types::{Block, FunctionSymbol}, +}; +use itertools::Itertools; +use miette::MietteHandlerOpts; /// Creates a compiler—with default settings for passes and polyfills—wrapping /// the module at the provided `path`. @@ -14,14 +23,40 @@ use hieratika_flo::FlatLoweredObject; /// - [`anyhow::Error`] if the path does not exist. /// - [`ltc_errors::compiler::Error`] if the compiler cannot load the file at /// `path` as LLVM IR. -pub fn default_compiler_from_path(path: &str) -> anyhow::Result { +pub fn default_compiler_from_path(path: &str) -> miette::Result { let path = Path::new(path); let ctx = SourceContext::create(path)?; Ok(CompilerBuilder::new(ctx).build()) } +/// Gets all functions in the provided `flo`. +/// +/// Note that this operates based on the symbol table, and will not discover +/// functions not inserted into said table. +pub fn get_functions(flo: &FlatLoweredObject) -> HashMap { + let syms_and_blocks = flo.symbols.code.iter().map(|(s, i)| (s.clone(), *i)).collect_vec(); + syms_and_blocks + .into_iter() + .map(|(s, i)| (s, flo.blocks.get(i))) + .collect() +} + /// Counts the number of functions found in the provided `flo`. +/// +/// Note that this works from the _symbol table_, and will not detect functions +/// not inserted into said table. pub fn count_functions(flo: &FlatLoweredObject) -> usize { - flo.blocks.iter().filter(|(_, b)| b.signature.is_some()).count() + get_functions(flo).len() +} + +/// Sets default reporting options for Miette in tests. +/// +/// This should be called at the start of each of the compiler tests. +pub fn set_miette_reporting() -> miette::Result<()> { + miette::set_hook(Box::new(|_| { + Box::new(MietteHandlerOpts::new().width(200).build()) + }))?; + + Ok(()) } diff --git a/crates/compiler/tests/compilation_alloc.rs b/crates/compiler/tests/compilation_alloc.rs new file mode 100644 index 0000000..2047ae7 --- /dev/null +++ b/crates/compiler/tests/compilation_alloc.rs @@ -0,0 +1,16 @@ +//! Tests compilation of `alloc.ll` the Rust core allocation library. + +mod common; + +#[test] +fn compiles_alloc() -> miette::Result<()> { + // We start by constructing and running the compiler + common::set_miette_reporting()?; + let compiler = common::default_compiler_from_path("input/compilation/alloc.ll")?; + let _flo = compiler.run(); + + // There should be a single function in the context. + // assert_eq!(common::count_functions(&flo), 1); + + Ok(()) +} diff --git a/crates/compiler/tests/compilation_basic_add.rs b/crates/compiler/tests/compilation_basic_add.rs index 92e8975..9fdc305 100644 --- a/crates/compiler/tests/compilation_basic_add.rs +++ b/crates/compiler/tests/compilation_basic_add.rs @@ -7,8 +7,9 @@ use itertools::Itertools; mod common; #[test] -fn compiles_add() -> anyhow::Result<()> { +fn compiles_add() -> miette::Result<()> { // We start by constructing and running the compiler + common::set_miette_reporting()?; let compiler = common::default_compiler_from_path("input/compilation/add.ll")?; let flo = compiler.run()?; @@ -20,13 +21,17 @@ fn compiles_add() -> anyhow::Result<()> { assert!(num_blocks <= 8); // We should only see one function in this generation, even if there are lots of - // blocks. + // blocks. But we also have two constant initializers, which display as + // functions. let num_functions = common::count_functions(&flo); assert_eq!(num_functions, 1); - // Let's grab that one function and poke at it a bit. - let (_, hieratika_rust_test_input) = - flo.blocks.iter().find(|(_, b)| b.signature.is_some()).unwrap(); + // Let's grab that one function and poke at it a bit, being sure to omit the + // initializers. + let functions = common::get_functions(&flo); + let hieratika_rust_test_input = functions + .get("_ZN19hieratika_rust_test_input3add17h828e50e9267cb510E") + .expect("Function was not present but should have been"); // It should have 13 statements in its body assert_eq!(hieratika_rust_test_input.statements.len(), 13); diff --git a/crates/compiler/tests/compilation_constants.rs b/crates/compiler/tests/compilation_constants.rs new file mode 100644 index 0000000..f2e6ffd --- /dev/null +++ b/crates/compiler/tests/compilation_constants.rs @@ -0,0 +1,14 @@ +//! Tests compilation of various kinds of constant initializer expressions in +//! the LLVM IR input. + +mod common; + +#[test] +fn compiles_constants() -> miette::Result<()> { + // We start by constructing and running the compiler + common::set_miette_reporting()?; + let compiler = common::default_compiler_from_path("input/compilation/constants.ll")?; + let _flo = compiler.run()?; + + Ok(()) +} diff --git a/crates/compiler/tests/compilation_opcodes.rs b/crates/compiler/tests/compilation_opcodes.rs index eaac6c9..272765e 100644 --- a/crates/compiler/tests/compilation_opcodes.rs +++ b/crates/compiler/tests/compilation_opcodes.rs @@ -4,8 +4,9 @@ mod common; #[test] -fn compiles_basic_opcodes() -> anyhow::Result<()> { +fn compiles_basic_opcodes() -> miette::Result<()> { // We start by constructing and running the compiler + common::set_miette_reporting()?; let compiler = common::default_compiler_from_path("input/compilation/opcodes.ll")?; let flo = compiler.run()?; @@ -16,11 +17,10 @@ fn compiles_basic_opcodes() -> anyhow::Result<()> { assert!(num_blocks >= 47); assert!(num_blocks < 100); - // We should see a _minimum_ of 43 functions, as that is the number that appears - // in the source file. However, the construction of the Phi and Select opcodes - // will have resulted in the allocation of two additional ones. + // We should see 43 functions, as that is the number that appears in the source + // file. let num_functions = common::count_functions(&flo); - assert_eq!(num_functions, 45); + assert_eq!(num_functions, 43); // Unfortunately this file is sufficiently cluttered that there is little sense // in poking at this all that much more, so we just treat the above as some diff --git a/crates/compiler/tests/compilation_terminators.rs b/crates/compiler/tests/compilation_terminators.rs index b5aa709..24eda61 100644 --- a/crates/compiler/tests/compilation_terminators.rs +++ b/crates/compiler/tests/compilation_terminators.rs @@ -4,8 +4,9 @@ mod common; #[test] -fn compiles_terminator_instructions() -> anyhow::Result<()> { +fn compiles_terminator_instructions() -> miette::Result<()> { // We start by constructing and running the compiler + common::set_miette_reporting()?; let compiler = common::default_compiler_from_path("input/compilation/terminators.ll")?; let flo = compiler.run()?; diff --git a/crates/compiler/tests/invalid_target.rs b/crates/compiler/tests/invalid_target.rs new file mode 100644 index 0000000..5fb078f --- /dev/null +++ b/crates/compiler/tests/invalid_target.rs @@ -0,0 +1,36 @@ +//! Tests that the compiler rejects inputs that have invalid target +//! specifications, both in terms of the target triple and the data layout. + +use hieratika_errors::compile::llvm::Error; + +mod common; + +#[test] +fn rejects_invalid_data_layout() -> miette::Result<()> { + // We start by constructing and running the compiler + let compiler = common::default_compiler_from_path("input/compilation/bad_data_layout.ll")?; + let err = compiler.run(); + + assert!(err.is_err()); + assert!(matches!( + err.unwrap_err().source, + Error::IncompatibleDataLayout(_, _) + )); + + Ok(()) +} + +#[test] +fn rejects_invalid_target_triple() -> miette::Result<()> { + // We start by constructing and running the compiler + let compiler = common::default_compiler_from_path("input/compilation/bad_target_triple.ll")?; + let err = compiler.run(); + + assert!(err.is_err()); + assert!(matches!( + err.unwrap_err().source, + Error::IncompatibleTargetSpecification(_, _) + )); + + Ok(()) +} diff --git a/crates/error/src/compile/llvm.rs b/crates/error/src/compile/llvm.rs index 6c5bd15..e31ebe6 100644 --- a/crates/error/src/compile/llvm.rs +++ b/crates/error/src/compile/llvm.rs @@ -29,6 +29,16 @@ pub enum Error { #[error("Could not create Rust string from C string: {_0}")] CStrConversionError(#[from] Utf8Error), + /// Emitted when the compilation process encounters an incompatible data + /// layout specification in the module being compiled. + #[error("The provided data layout {_0} is not compatible with {_1}")] + IncompatibleDataLayout(String, String), + + /// Emitted when the compilation process encounters an incompatible target + /// machine specification in the module being compiled. + #[error("The provided target specification {_0} is not compatible with {_1}")] + IncompatibleTargetSpecification(String, String), + #[error("`{_0}` with invalid segment `{_1}` could not be parsed as an LLVM data layout")] InvalidDataLayoutSpecification(String, String), diff --git a/crates/flo/src/types.rs b/crates/flo/src/types.rs index d47e2d2..b272f3e 100644 --- a/crates/flo/src/types.rs +++ b/crates/flo/src/types.rs @@ -512,6 +512,7 @@ pub enum Type { Unsigned128, Signed8, Signed16, + Signed24, Signed32, Signed64, Signed128,