Skip to content

Commit

Permalink
Remove various int types from optimization passes (#44468)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Mar 5, 2022
1 parent 111525d commit db28215
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 106 deletions.
4 changes: 3 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,15 @@ $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUI
$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h
$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
Expand Down
7 changes: 0 additions & 7 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,6 @@ auto getVoidTy(LLVMContext &ctxt) {
auto getCharTy(LLVMContext &ctxt) {
return getInt32Ty(ctxt);
}
auto getSizeTy(LLVMContext &ctxt) {
if (sizeof(size_t) > sizeof(uint32_t)) {
return getInt64Ty(ctxt);
} else {
return getInt32Ty(ctxt);
}
}
auto getInt8PtrTy(LLVMContext &ctxt) {
return Type::getInt8PtrTy(ctxt);
}
Expand Down
8 changes: 8 additions & 0 deletions src/codegen_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ enum AddressSpace {
LastSpecial = Loaded,
};

static inline auto getSizeTy(llvm::LLVMContext &ctxt) {
if (sizeof(size_t) > sizeof(uint32_t)) {
return llvm::Type::getInt64Ty(ctxt);
} else {
return llvm::Type::getInt32Ty(ctxt);
}
}

namespace JuliaType {
static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
return llvm::StructType::get(C);
Expand Down
33 changes: 14 additions & 19 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ struct AllocOpt : public JuliaPassContext {
Function *lifetime_start;
Function *lifetime_end;

Type *T_int64;

bool doInitialization(Module &m);
bool runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT);
};
Expand Down Expand Up @@ -555,8 +553,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
AllocaInst *buff;
Instruction *ptr;
if (sz == 0) {
buff = prolog_builder.CreateAlloca(pass.T_int8, ConstantInt::get(pass.T_int64, 0));
ptr = buff;
ptr = buff = prolog_builder.CreateAlloca(Type::getInt8Ty(prolog_builder.getContext()), ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), 0));
}
else if (has_ref) {
// Allocate with the correct type so that the GC frame lowering pass will
Expand All @@ -565,7 +562,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
// the alloca isn't optimized out.
buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
buff->setAlignment(Align(align));
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
}
else {
Type *buffty;
Expand All @@ -575,9 +572,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
buff = prolog_builder.CreateAlloca(buffty);
buff->setAlignment(Align(align));
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
}
insertLifetime(ptr, ConstantInt::get(pass.T_int64, sz), orig_inst);
insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
auto new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, pass.T_pjlvalue));
new_inst->takeName(orig_inst);

Expand Down Expand Up @@ -838,8 +835,8 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
allocty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), field.size);
}
slot.slot = prolog_builder.CreateAlloca(allocty);
insertLifetime(prolog_builder.CreateBitCast(slot.slot, pass.T_pint8),
ConstantInt::get(pass.T_int64, field.size), orig_inst);
insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
slots.push_back(std::move(slot));
}
const auto nslots = slots.size();
Expand Down Expand Up @@ -895,8 +892,8 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
}
}
else {
addr = builder.CreateBitCast(slot.slot, pass.T_pint8);
addr = builder.CreateConstInBoundsGEP1_32(pass.T_int8, addr, offset);
addr = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
addr = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), addr, offset);
addr = builder.CreateBitCast(addr, elty->getPointerTo());
}
return addr;
Expand Down Expand Up @@ -947,7 +944,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
if (slot.isref) {
assert(slot.offset == offset);
if (!isa<PointerType>(store_ty)) {
store_val = builder.CreateBitCast(store_val, pass.T_size);
store_val = builder.CreateBitCast(store_val, getSizeTy(builder.getContext()));
store_val = builder.CreateIntToPtr(store_val, pass.T_pjlvalue);
store_ty = pass.T_pjlvalue;
}
Expand Down Expand Up @@ -1010,17 +1007,17 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
else {
uint64_t intval;
memset(&intval, val, 8);
Constant *val = ConstantInt::get(pass.T_size, intval);
Constant *val = ConstantInt::get(getSizeTy(builder.getContext()), intval);
val = ConstantExpr::getIntToPtr(val, pass.T_pjlvalue);
ptr = ConstantExpr::getAddrSpaceCast(val, pass.T_prjlvalue);
}
StoreInst *store = builder.CreateAlignedStore(ptr, slot.slot, Align(sizeof(void*)));
store->setOrdering(AtomicOrdering::NotAtomic);
continue;
}
auto ptr8 = builder.CreateBitCast(slot.slot, pass.T_pint8);
auto ptr8 = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
if (offset > slot.offset)
ptr8 = builder.CreateConstInBoundsGEP1_32(pass.T_int8, ptr8,
ptr8 = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptr8,
offset - slot.offset);
auto sub_size = std::min(slot.offset + slot.size, offset + size) -
std::max(offset, slot.offset);
Expand Down Expand Up @@ -1138,10 +1135,8 @@ bool AllocOpt::doInitialization(Module &M)

DL = &M.getDataLayout();

T_int64 = Type::getInt64Ty(getLLVMContext());

lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext()) });
lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext()) });

return true;
}
Expand Down
20 changes: 10 additions & 10 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,20 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
AllocaInst *gcframe = new AllocaInst(
T_prjlvalue,
0,
ConstantInt::get(T_int32, nRoots + 2),
ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2),
Align(16));
gcframe->insertAfter(target);
gcframe->takeName(target);

// Zero out the GC frame.
BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), "");
tempSlot_i8->insertAfter(gcframe);
Type *argsT[2] = {tempSlot_i8->getType(), T_int32};
Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())};
Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT));
Value *args[4] = {
tempSlot_i8, // dest
ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
ConstantInt::get(T_int32, sizeof(jl_value_t*) * (nRoots + 2)), // len
ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len
ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
cast<MemSetInst>(zeroing)->setDestAlignment(16);
Expand All @@ -101,10 +101,10 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
StoreInst *inst = builder.CreateAlignedStore(
ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS(nRoots)),
builder.CreateBitCast(
builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
T_size->getPointerTo()),
getSizeTy(F.getContext())->getPointerTo()),
Align(sizeof(void*)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
inst = builder.CreateAlignedStore(
Expand Down Expand Up @@ -150,7 +150,7 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
builder.SetInsertPoint(target);

// The first two slots are reserved, so we'll add two to the index.
index = builder.CreateAdd(index, ConstantInt::get(T_int32, 2));
index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));

// Lower the intrinsic as a GEP.
auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
Expand Down Expand Up @@ -179,11 +179,11 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
if (offset < 0) {
newI = builder.CreateCall(
bigAllocFunc,
{ ptls, ConstantInt::get(T_size, sz + sizeof(void*)) });
{ ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) });
}
else {
auto pool_offs = ConstantInt::get(T_int32, offset);
auto pool_osize = ConstantInt::get(T_int32, osize);
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
}
newI->setAttributes(newI->getCalledFunction()->getAttributes());
Expand Down Expand Up @@ -240,7 +240,7 @@ bool FinalLowerGC::doFinalization(Module &M)
used->eraseFromParent();
if (init.empty())
return true;
ArrayType *ATy = ArrayType::get(T_pint8, init.size());
ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, init), "llvm.compiler.used");
used->setSection("llvm.metadata");
Expand Down
9 changes: 7 additions & 2 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2168,6 +2168,7 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
// Size of T is assumed to be `sizeof(void*)`
Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
{
auto T_size = getSizeTy(T->getContext());
assert(T == T_size || isa<PointerType>(T));
auto TV = cast<PointerType>(V->getType());
auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
Expand All @@ -2176,6 +2177,7 @@ Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)

Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
{
auto T_size = getSizeTy(builder.getContext());
auto addr = EmitTagPtr(builder, T_size, V);
LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
load->setOrdering(AtomicOrdering::Unordered);
Expand Down Expand Up @@ -2235,6 +2237,8 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {


bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
auto T_int32 = Type::getInt32Ty(F.getContext());
auto T_size = getSizeTy(F.getContext());
bool ChangesMade = false;
// We create one alloca for all the jlcall frames that haven't been processed
// yet. LLVM would merge them anyway later, so might as well save it a bit
Expand Down Expand Up @@ -2535,7 +2539,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
// Get the slot address.
auto slotAddress = CallInst::Create(
getOrDeclare(jl_intrinsics::getGCFrameSlot),
{GCFrame, ConstantInt::get(T_int32, Colors[R] + MinColorRoot)},
{GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
"", InsertBefore);

Value *Val = GetPtrForNumber(S, R, InsertBefore);
Expand Down Expand Up @@ -2574,6 +2578,7 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,

void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
auto F = S.F;
auto T_int32 = Type::getInt32Ty(F->getContext());
int MaxColor = -1;
for (auto C : Colors)
if (C > MaxColor)
Expand All @@ -2595,7 +2600,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State

// Replace Allocas
unsigned AllocaSlot = 2; // first two words are metadata
auto replace_alloca = [this, gcframe, &AllocaSlot](AllocaInst *&AI) {
auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
// Pick a slot for the alloca.
unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size
assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities");
Expand Down
Loading

2 comments on commit db28215

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected. A full report can be found here.

Please sign in to comment.