diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index 3033720b5df8ca..b96dece1b76dd2 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -1057,7 +1057,7 @@ Sometimes you can circumvent the need to allocate memory on each function call b the output. As a trivial example, compare ```jldoctest prealloc -julia> function xinc(x) +julia> @noinline function xinc(x) return [x, x+1, x+2] end; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index b9923f00ed90ac..79837d80d8f432 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -4554,26 +4554,13 @@ static jl_cgval_t emit_const_len_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ auto cg_tot = ConstantInt::get(T_size, tot); auto call = prepare_call(jl_alloc_obj_func); alloc = ctx.builder.CreateCall(call, { ct, cg_tot, track_pjlvalue(ctx, cg_typ)}); - dbgs() << "alloc: "; - alloc->print(dbgs(), NULL); - // set data (jl_alloc_genericmemory_unchecked_func doesn't have it) decay_alloc = decay_derived(ctx, alloc); - //dbgs() << "\ndecay_alloc: "; - decay_alloc->print(dbgs(), NULL); ptr_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); - //dbgs() << "\nptr_field: "; - //ptr_field->print(dbgs(), NULL); auto objref = emit_pointer_from_objref(ctx, alloc); - //dbgs() << "\nobjref: "; - //objref->print(dbgs(), NULL); - Value *data = track_pjlvalue(ctx, emit_ptrgep(ctx, objref, JL_SMALL_BYTE_ALIGNMENT)); - //dbgs() << "\ndata: "; - //data->print(dbgs(), NULL); - //dbgs() << "\n"; ctx.builder.CreateAlignedStore(data, ptr_field, Align(sizeof(void*))); } else { // just use the dynamic length version since the malloc will be slow anyway - auto ptls = get_current_ptls(ctx); + auto ptls = get_current_ptls(ctx); auto call = prepare_call(jl_alloc_genericmemory_unchecked_func); alloc = ctx.builder.CreateCall(call, { ptls, cg_nbytes, cg_typ}); decay_alloc = maybe_decay_tracked(ctx, alloc); diff --git a/src/codegen.cpp b/src/codegen.cpp index 328b16b75aad7f..fc878c230549f4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1545,12 +1545,16 @@ static const auto pointer_from_objref_func = new JuliaFunction<>{ {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); + FnAttrs.addAttribute(Attribute::NoSync); + FnAttrs.addAttribute(Attribute::NoUnwind); + FnAttrs.addAttribute(Attribute::Speculatable); + FnAttrs.addAttribute(Attribute::WillReturn); + FnAttrs.addAttribute(Attribute::NoRecurse); #if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); #else FnAttrs.addAttribute(Attribute::ReadNone); #endif - FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), Attributes(C, {Attribute::NonNull}), diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 188955fd50972f..50c6b168f57c07 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -733,11 +733,11 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF call->eraseFromParent(); return; } - //if (pass.gc_loaded_func == callee) { - // call->replaceAllUsesWith(new_i); - // call->eraseFromParent(); - // return; - //} + if (pass.gc_loaded_func == callee) { + call->replaceAllUsesWith(new_i); + call->eraseFromParent(); + return; + } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag);