Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More work on self hosted compiler #454

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6091e9f
improve printing, you can now just .print() pretty much any ast
Akuli Dec 11, 2023
2fc30ba
implement dereferencing
Akuli Dec 11, 2023
cbf4682
more print cleanup
Akuli Dec 11, 2023
0a9e6df
delete extra print
Akuli Dec 11, 2023
bd80178
self-hosted: exporting types
Akuli Dec 13, 2023
f8a062a
check if can take address
Akuli Dec 13, 2023
6f35d2a
move run_exe() to separate file
Akuli Dec 13, 2023
855e5b6
check assert type
Akuli Dec 13, 2023
d483812
support "not" operator
Akuli Dec 13, 2023
36eba33
Do not compare compiler warnings
Akuli Dec 13, 2023
2f0c3c6
fix
Akuli Dec 13, 2023
3436685
remove boilerplate (again)
Akuli Dec 13, 2023
d09f4f1
array literal error msg
Akuli Dec 13, 2023
617d0f5
Empty array type error
Akuli Dec 13, 2023
84498a5
port the crappy assertion stuff
Akuli Dec 14, 2023
24f2010
fix sizeof
Akuli Dec 14, 2023
64d8995
fixed globals
Akuli Dec 14, 2023
fbc186e
float/double related implicit conversions
Akuli Dec 14, 2023
064e51b
class field pointer hell
Akuli Dec 14, 2023
80f468c
fix importing global variables
Akuli Dec 14, 2023
10c17ec
pointer fixes
Akuli Dec 14, 2023
bcf3728
simpler solution?
Akuli Dec 14, 2023
678a677
run _macos_startup.jou
Akuli Dec 14, 2023
768e6aa
skip crash tests
Akuli Dec 14, 2023
3ce1ba3
asd
Akuli Dec 14, 2023
86b69ab
temporary thingy
Akuli Dec 14, 2023
c1f6d55
call the special startup function
Akuli Dec 14, 2023
ca5267c
Merge remote-tracking branch 'origin/main' into self-hosted
Akuli Dec 14, 2023
44dd388
more temporary test
Akuli Dec 15, 2023
c73eec6
Merge remote-tracking branch 'origin/main' into self-hosted
Akuli Dec 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ jobs:
steps:
- uses: actions/checkout@v3
- run: brew install bash diffutils llvm@13
- run: ./compare_compilers.sh
- run: ./compare_compilers.sh --fix
- run: git diff --exit-code
4 changes: 4 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,5 +190,9 @@ jobs:
- uses: actions/checkout@v3
- run: source activate && ./windows_setup.sh --small
shell: bash
- run: source activate && mingw32-make self_hosted_compiler.exe
shell: bash
- run: ./self_hosted_compiler.exe tests/should_succeed/union.jou
shell: bash
- run: source activate && ./compare_compilers.sh
shell: bash
53 changes: 41 additions & 12 deletions self_hosted/create_llvm_ir.jou
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,16 @@ def create_llvm_union_type(types: LLVMType**, ntypes: int) -> LLVMType*:

return LLVMArrayType(LLVMInt64Type(), ((size_needed + 7) / 8) as int) # ceil division


# Pointers in classes are stored as i8*, so that a struct can contain a pointer to itself.
def field_uses_i8_ptr_hack(field: ClassField*) -> bool:
return field->type->kind == TypeKind::Pointer


def class_type_to_llvm(fields: ClassField*, nfields: int) -> LLVMType*:
elem_types: LLVMType** = malloc(nfields * sizeof elem_types[0])
for i = 0; i < nfields; i++:
# Store all pointers in structs as i8*, so that a struct can contain a pointer to itself for example.
if fields[i].type->kind == TypeKind::Pointer:
if field_uses_i8_ptr_hack(&fields[i]):
elem_types[i] = LLVMPointerType(LLVMInt8Type(), 0)
else:
elem_types[i] = type_to_llvm(fields[i].type)
Expand Down Expand Up @@ -224,6 +229,13 @@ class AstToIR:
printf("unimplemented cast: %s --> %s\n", from->name, to->name)
assert False

# Makes a temporary pointer, places the value there, then casts and reads the pointer.
def do_cast_through_pointers(self, value: LLVMValue*, to: LLVMType*) -> LLVMValue*:
p1 = LLVMBuildAlloca(self->builder, LLVMTypeOf(value), "cast_through_ptr_temp")
LLVMBuildStore(self->builder, value, p1)
p2 = LLVMBuildBitCast(self->builder, p1, LLVMPointerType(to, 0), "cast_through_ptr_temp")
return LLVMBuildLoad(self->builder, p2, "cast_through_ptr_result")

def do_binop(
self,
op: AstExpressionKind,
Expand Down Expand Up @@ -316,7 +328,15 @@ class AstToIR:
if op == AstExpressionKind::Eq:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::EQ, lhs_int, rhs_int, "ptreq")
if op == AstExpressionKind::Ne:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::NE, lhs_int, rhs_int, "ptreq")
return LLVMBuildICmp(self->builder, LLVMIntPredicate::NE, lhs_int, rhs_int, "ptrne")
if op == AstExpressionKind::Gt:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::UGT, lhs_int, rhs_int, "ptrgt")
if op == AstExpressionKind::Ge:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::UGE, lhs_int, rhs_int, "ptrge")
if op == AstExpressionKind::Lt:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::ULT, lhs_int, rhs_int, "ptrlt")
if op == AstExpressionKind::Le:
return LLVMBuildICmp(self->builder, LLVMIntPredicate::ULE, lhs_int, rhs_int, "ptrle")
assert False

printf("%s %d %s\n", lhs_type->name, op, rhs_type->name)
Expand Down Expand Up @@ -390,19 +410,23 @@ class AstToIR:
assert field != NULL
field_pointer = LLVMBuildStructGEP2(
self->builder,
type_to_llvm(class_type), instance_pointer,
type_to_llvm(class_type),
instance_pointer,
field->union_id,
field->name,
)

# This cast is needed for two reasons two cases:
# * All pointers are i8* in structs so we can do self-referencing classes.
# * This is how unions work.
return LLVMBuildBitCast(
self->builder,
field_pointer, LLVMPointerType(type_to_llvm(field->type),0),
"struct_member_cast",
)
if field_uses_i8_ptr_hack(field) or field->belongs_to_union:
puts("Bitcast!!!!!!")
fflush(stdout)
field_pointer = LLVMBuildBitCast(
self->builder,
field_pointer,
LLVMPointerType(type_to_llvm(field->type), 0),
"class_field_ptr_cast",
)

return field_pointer

if ast->kind == AstExpressionKind::Indexing:
# &pointer[index] = pointer + some offset
Expand Down Expand Up @@ -568,6 +592,8 @@ class AstToIR:
field = instance_type->class_members.find_field(ast->instantiation.field_names[i])
assert field != NULL
value = self->do_expression(&ast->instantiation.field_values[i])
if field_uses_i8_ptr_hack(field) or field->belongs_to_union:
value = self->do_cast_through_pointers(value, LLVMStructGetTypeAtIndex(type_to_llvm(instance_type), field->union_id))
result = LLVMBuildInsertValue(self->builder, result, value, field->union_id, "instance")

elif ast->kind == AstExpressionKind::GetClassField:
Expand All @@ -584,6 +610,9 @@ class AstToIR:
assert field != NULL
result = LLVMBuildExtractValue(self->builder, instance, field->union_id, field->name)

if field_uses_i8_ptr_hack(field) or field->belongs_to_union:
result = self->do_cast_through_pointers(result, type_to_llvm(field->type))

elif ast->kind == AstExpressionKind::GetVariable:
v = get_special_constant(ast->varname)
if v == -1:
Expand Down
2 changes: 2 additions & 0 deletions self_hosted/llvm.jou
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ declare LLVMDisposeModule(M: LLVMModule*) -> None
declare LLVMGetSourceFileName(M: LLVMModule*, Len: long*) -> byte* # Return value not owned
declare LLVMSetDataLayout(M: LLVMModule*, DataLayoutStr: byte*) -> None
declare LLVMSetTarget(M: LLVMModule*, Triple: byte*) -> None
declare LLVMDumpType(Val: LLVMType*) -> None
declare LLVMDumpModule(M: LLVMModule*) -> None
declare LLVMPrintModuleToString(M: LLVMModule*) -> byte*
declare LLVMAddFunction(M: LLVMModule*, Name: byte*, FunctionTy: LLVMType*) -> LLVMValue*
Expand All @@ -212,6 +213,7 @@ declare LLVMIntType(NumBits: int) -> LLVMType*
declare LLVMGetReturnType(FunctionTy: LLVMType*) -> LLVMType*
declare LLVMGetParam(Fn: LLVMValue*, Index: int) -> LLVMValue*
declare LLVMGetElementType(Ty: LLVMType*) -> LLVMType*
declare LLVMStructGetTypeAtIndex(StructTy: LLVMType*, i: int) -> LLVMType*
declare LLVMTypeOf(Val: LLVMValue*) -> LLVMType*
declare LLVMConstNull(Ty: LLVMType*) -> LLVMValue*
declare LLVMGetUndef(Ty: LLVMType*) -> LLVMValue*
Expand Down
6 changes: 1 addition & 5 deletions self_hosted/runs_wrong.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@ tests/other_errors/missing_value_in_return.jou
tests/other_errors/noreturn_but_return_with_value.jou
tests/other_errors/noreturn_but_return_without_value.jou
tests/should_succeed/compiler_cli.jou
tests/should_succeed/linked_list.jou
tests/should_succeed/pointer.jou
tests/should_succeed/printf.jou
tests/other_errors/return_void.jou
tests/should_succeed/stderr.jou
tests/should_succeed/unused_import.jou
tests/wrong_type/cannot_be_indexed.jou
tests/wrong_type/index.jou
tests/syntax_error/assign_to_None.jou
tests/syntax_error/None_as_value.jou
tests/should_succeed/union.jou
31 changes: 26 additions & 5 deletions self_hosted/typecheck.jou
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def can_cast_implicitly(from: Type*, to: Type*) -> bool:
or (from->is_pointer_type() and to->is_pointer_type() and (from == &void_ptr_type or to == &void_ptr_type))
)


def can_cast_explicitly(from: Type*, to: Type*) -> bool:
return (
from == to
Expand All @@ -53,6 +54,7 @@ def can_cast_explicitly(from: Type*, to: Type*) -> bool:
or (from == &bool_type and to->is_integer_type())
)


# Implicit casts are used in many places, e.g. function arguments.
#
# When you pass an argument of the wrong type, it's best to give an error message
Expand Down Expand Up @@ -115,6 +117,7 @@ class ExportSymbol:
else:
assert False


class ExpressionTypes:
expression: AstExpression*
original_type: Type*
Expand Down Expand Up @@ -237,7 +240,8 @@ class FileTypes:
self->all_functions = realloc(self->all_functions, sizeof self->all_functions[0] * (self->n_all_functions + 1))
self->all_functions[self->n_all_functions++] = symbol->signature.copy()
elif symbol->kind == ExportSymbolKind::GlobalVariable:
pass # TODO
self->globals = realloc(self->globals, sizeof(self->globals[0]) * (self->nglobals + 1))
self->globals[self->nglobals++] = GlobalVariable{name = symbol->name, type = symbol->type}
else:
symbol->print()
assert False
Expand Down Expand Up @@ -419,6 +423,7 @@ def handle_class_members_stage2(ft: FileTypes*, classdef: AstClassDef*) -> None:
name = member->field.name,
type = type_from_ast(ft, &member->field.type),
union_id = union_id++,
belongs_to_union = False,
}
elif member->kind == AstClassMemberKind::Union:
uid = union_id++
Expand All @@ -428,6 +433,7 @@ def handle_class_members_stage2(ft: FileTypes*, classdef: AstClassDef*) -> None:
name = member->union_fields.fields[k].name,
type = type_from_ast(ft, &member->union_fields.fields[k].type),
union_id = uid,
belongs_to_union = True,
}
elif member->kind == AstClassMemberKind::Method:
# Don't handle the method body yet: that is a part of stage 3, not stage 2
Expand Down Expand Up @@ -693,7 +699,16 @@ def check_binop(

if (
(not got_numbers and not got_enums and not got_pointers)
or (op != AstExpressionKind::Eq and op != AstExpressionKind::Ne and not got_numbers)
or (got_enums and op != AstExpressionKind::Eq and op != AstExpressionKind::Ne)
or (
got_pointers
and op != AstExpressionKind::Eq
and op != AstExpressionKind::Ne
and op != AstExpressionKind::Gt
and op != AstExpressionKind::Ge
and op != AstExpressionKind::Lt
and op != AstExpressionKind::Le
)
):
message: byte[500]
snprintf(
Expand Down Expand Up @@ -1026,11 +1041,17 @@ class Stage3TypeChecker:
snprintf(message, sizeof message[0], "value of type %s cannot be indexed", types->original_type->name)
fail(pointer->location, message)

index_type = self->do_expression(index)->original_type
if not index_type->is_integer_type():
snprintf(message, sizeof message[0], "the index inside [...] must be an integer, not %s", index_type->name)
index_types = self->do_expression(index)
assert index_types != NULL

if not index_types->original_type->is_integer_type():
snprintf(message, sizeof message[0], "the index inside [...] must be an integer, not %s", index_types->original_type->name)
fail(index->location, message)

# LLVM assumes that indexes smaller than 64 bits are signed.
# https://github.com/Akuli/jou/issues/48
index_types->do_implicit_cast(long_type, Location{}, NULL)

return pointer_type->value_type

def do_expression_maybe_void(self, expression: AstExpression*) -> ExpressionTypes*:
Expand Down
1 change: 1 addition & 0 deletions self_hosted/types.jou
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ClassField:
# If multiple fields have the same union_id, they belong to the same union.
# It means that only one of the fields can be used at a time.
union_id: int
belongs_to_union: bool # are there more fields with same union_id

class ClassMembers:
fields: ClassField*
Expand Down
16 changes: 4 additions & 12 deletions src/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ static LLVMTypeRef codegen_type(const Type *type)

LLVMTypeRef *flat_elems = malloc(sizeof(flat_elems[0]) * n); // NOLINT
for (int i = 0; i < n; i++) {
// Treat all pointers inside structs as if they were void*.
// This allows structs to contain pointers to themselves.
// Treat all pointers inside classes as if they were void*.
// This allows classes to contain pointers to themselves.
if (type->data.classdata.fields.ptr[i].type->kind == TYPE_POINTER)
flat_elems[i] = codegen_type(voidPtrType);
else
Expand Down Expand Up @@ -323,7 +323,7 @@ static void codegen_instruction(const struct State *st, const CfInstruction *ins

LLVMValueRef val = LLVMBuildStructGEP2(st->builder, codegen_type(classtype), getop(0), f->union_id, ins->data.fieldname);
// This cast is needed in two cases:
// * All pointers are i8* in structs so we can do self-referencing classes.
// * All pointers are i8* in classes so we can do self-referencing classes.
// * This is how unions work.
val = LLVMBuildBitCast(st->builder, val, LLVMPointerType(codegen_type(f->type),0), "struct_member_cast");
setdest(val);
Expand All @@ -336,15 +336,7 @@ static void codegen_instruction(const struct State *st, const CfInstruction *ins
}
break;
case CF_PTR_ADD_INT:
{
LLVMValueRef index = getop(1);
if (ins->operands[1]->type->kind == TYPE_UNSIGNED_INTEGER) {
// https://github.com/Akuli/jou/issues/48
// Apparently the default is to interpret indexes as signed.
index = LLVMBuildZExt(st->builder, index, LLVMInt64Type(), "ptr_add_int_implicit_cast");
}
setdest(LLVMBuildGEP(st->builder, getop(0), &index, 1, "ptr_add_int"));
}
setdest(LLVMBuildGEP(st->builder, getop(0), (LLVMValueRef[]){getop(1)}, 1, "ptr_add_int"));
break;
case CF_NUM_CAST:
{
Expand Down
10 changes: 7 additions & 3 deletions src/typecheck.c
Original file line number Diff line number Diff line change
Expand Up @@ -756,14 +756,18 @@ static const Type *typecheck_indexing(
}
assert(ptrtype->kind == TYPE_POINTER);

const Type *indextype = typecheck_expression_not_void(ft, indexexpr)->type;
if (!is_integer_type(indextype)) {
ExpressionTypes *indextypes = typecheck_expression_not_void(ft, indexexpr);
if (!is_integer_type(indextypes->type)) {
fail_with_error(
indexexpr->location,
"the index inside [...] must be an integer, not %s",
indextype->name);
indextypes->type->name);
}

// LLVM assumes that indexes smaller than 64 bits are signed.
// https://github.com/Akuli/jou/issues/48
do_implicit_cast(indextypes, longType, (Location){0}, NULL);

return ptrtype->data.valuetype;
}

Expand Down
Loading