Skip to content

Commit

Permalink
better fold memcpy struct ops together
Browse files Browse the repository at this point in the history
  • Loading branch information
RicardoLuis0 committed Jul 30, 2023
1 parent e51acef commit 96ac9ba
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 97 deletions.
200 changes: 103 additions & 97 deletions src/common/scripting/backend/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2796,35 +2796,23 @@ void GenStructCopyOps(PStruct * s)
if(PField * f; (f = dyn_cast<PField>(p->Value)) && !(f->Flags & VARF_Meta))
{ // store all fields
assert(f->Type->SizeKnown);
sortedFields.Push(f);
sortedFields.Insert(sortedFields.SortedFind(f, [](const PField* a, const PField* b)
{
return a->Offset < b->Offset;
}, false),f);
}
}
}

std::sort(sortedFields.begin(), sortedFields.end(),
[](const PField* a, const PField* b)
{
return a->Offset < b->Offset;
}
);

struct field_group
{
size_t start_index;
size_t end_index;
};

TArray<field_group> memcpys;

TArray<PField *> objects;

TArray<PField *> complex;

size_t simple_start = SIZE_MAX;
TArray<StructCopyOp> memcpy_ops;
TArray<StructCopyOp> complex_ops; // array/dynarray/map copies

for(size_t i = 0; i < sortedFields.Size(); i++)
{
auto *t = sortedFields[i]->Type;
auto * t = sortedFields[i]->Type;

auto * baseType = PType::underlyingArrayType(t);

Expand All @@ -2834,108 +2822,126 @@ void GenStructCopyOps(PStruct * s)
if(baseType->isObjectPointer())
{
objects.Push(sortedFields[i]);
if(simple_start == SIZE_MAX) simple_start = i;
memcpy_ops.Push({
StructCopyOpType::Memcpy,
unsigned(sortedFields[i]->Offset),
unsigned(sortedFields[i]->Type->Size),
nullptr
});
}
else
{
complex.Push(sortedFields[i]);
if(simple_start != SIZE_MAX)
{
memcpys.Push({simple_start, i-1});
simple_start = SIZE_MAX;
}
}
}
else
{
if(simple_start == SIZE_MAX) simple_start = i;
memcpy_ops.Push({
StructCopyOpType::Memcpy,
unsigned(sortedFields[i]->Offset),
unsigned(sortedFields[i]->Type->Size),
nullptr
});
}
}

assert(memcpy_ops.IsSorted());

for(PField * obj : objects)
{ // object barriers go before anything else
ops.Push({
obj->Type->isArray() ? StructCopyOpType::ObjArrayBarrier : StructCopyOpType::ObjBarrier,
unsigned(obj->Offset),
obj->Type->Size,
obj->Type
});
}

for(PField * cmp : complex)
{
TArray<StructCopyOp> memcpy_ops;
TArray<StructCopyOp> complex_ops; // array/dynarray/map copies

for(PField * obj : objects)
{ // object barriers go before anything else
ops.Push({
obj->Type->isArray() ? StructCopyOpType::ObjArrayBarrier : StructCopyOpType::ObjBarrier,
unsigned(obj->Offset),
obj->Type->Size,
obj->Type
if(cmp->Type->isArray())
{
auto realType = PType::underlyingArrayType(cmp->Type);
complex_ops.Push({
realType->isStruct() ? StructCopyOpType::ArrayCopyStruct : cmp->Type == TypeString ? StructCopyOpType::ArrayCopyString : StructCopyOpType::ArrayCopyDynArrayMap,
unsigned(cmp->Offset),
cmp->Type->Size,
cmp->Type
});
}

for(field_group mem : memcpys)
{ // place memcpy calls together
assert(mem.start_index != SIZE_MAX && mem.end_index != mem.start_index && mem.end_index >= mem.end_index);

PField * start = sortedFields[mem.start_index];
PField * end = sortedFields[mem.end_index];
// start = offset1 , end = offset2 + size2
memcpy_ops.Push({
StructCopyOpType::Memcpy,
unsigned(start->Offset),
unsigned((end->Offset + end->Type->Size) - start->Offset),
nullptr
else if(!cmp->Type->isStruct())
{
complex_ops.Push({
cmp->Type == TypeString ? StructCopyOpType::StringCopy : StructCopyOpType::DynArrayMapCopy,
unsigned(cmp->Offset),
cmp->Type->Size,
cmp->Type
});
}
else // if(cmp->Type->isStruct())
{ // flatten structs into existing op list
const TArray<StructCopyOp>& copyOps = *FxComplexStructAssign::GetCopyOps(static_cast<PStruct*>(cmp->Type));
// this is guaranteed to exist, since copy ops are being generated in order of dependence during CompileAllFields
unsigned off = unsigned(cmp->Offset);
for(auto op : copyOps)
{
StructCopyOp newop {
op.op,
unsigned(op.offset) + off,
op.size,
op.type
};

if(op.op == StructCopyOpType::ObjArrayBarrier || op.op == StructCopyOpType::ObjBarrier)
{
ops.Push(newop);
}
else if(op.op == StructCopyOpType::Memcpy)
{ // make sure copy ops are in order
memcpy_ops.SortedInsert(newop);
}
else
{
complex_ops.Push(newop);
}
}
}
}

{
// fold memcpy ops
TArray<StructCopyOp> memcpy_ops_tmp;

for(PField * cmp : complex)
assert(memcpy_ops.IsSorted());

unsigned n = memcpy_ops.Size();

for(unsigned i = 0; i < n; i++)
{
if(cmp->Type->isArray())
unsigned j;
for(j = i; (j + 1) < n; j++)
{
auto realType = PType::underlyingArrayType(cmp->Type);
complex_ops.Push({
realType->isStruct() ? StructCopyOpType::ArrayCopyStruct : cmp->Type == TypeString ? StructCopyOpType::ArrayCopyString : StructCopyOpType::ArrayCopyDynArrayMap,
unsigned(cmp->Offset),
cmp->Type->Size,
cmp->Type
});
StructCopyOp cur = memcpy_ops[j];
StructCopyOp next = memcpy_ops[j + 1];
if((cur.offset + cur.size) != next.offset) break;
}
else if(!cmp->Type->isStruct())

StructCopyOp op = memcpy_ops[i];
if(j != i)
{
complex_ops.Push({
cmp->Type == TypeString ? StructCopyOpType::StringCopy : StructCopyOpType::DynArrayMapCopy,
unsigned(cmp->Offset),
cmp->Type->Size,
cmp->Type
});
}
else // if(cmp->Type->isStruct())
{ // flatten structs into existing op list
// TODO optimization: fold memcpy ops in the start and end complex of complex struct into pre-existing memcpy ops
const TArray<StructCopyOp>& copyOps = *FxComplexStructAssign::GetCopyOps(static_cast<PStruct*>(cmp->Type));
// this is guaranteed to exist, since copy ops are being generated in order of dependence during CompileAllFields
unsigned off = unsigned(cmp->Offset);
for(auto op : copyOps)
{
StructCopyOp newop {
op.op,
unsigned(op.offset) + off,
op.size,
op.type
};

if(op.op == StructCopyOpType::ObjArrayBarrier || op.op == StructCopyOpType::ObjBarrier)
{
ops.Push(newop);
}
else if(op.op == StructCopyOpType::Memcpy)
{
memcpy_ops.Push(newop);
}
else
{
complex_ops.Push(newop);
}
}
StructCopyOp merge_op = memcpy_ops[j];
op.size = (merge_op.offset + merge_op.size) - op.offset;
i = j;
}
memcpy_ops_tmp.Push(op);
}

ops.Append(memcpy_ops);
ops.Append(complex_ops);
assert(memcpy_ops_tmp.IsSorted());
memcpy_ops = std::move(memcpy_ops_tmp);
}

ops.Append(memcpy_ops);
ops.Append(complex_ops);

FxComplexStructAssign::struct_copy_ops.Insert(s,std::move(ops));
}

Expand Down
4 changes: 4 additions & 0 deletions src/common/scripting/backend/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,10 @@ struct StructCopyOp
unsigned offset;
unsigned size; // bytes for memcpy, unused otherwise
PType * type; // used for struct, dynarray, map and non-dynamic array copies, null for memcpy
bool operator< (const StructCopyOp& other) const noexcept
{
return offset < other.offset;
}
};

class FxComplexStructAssign : public FxExpression
Expand Down
94 changes: 94 additions & 0 deletions src/common/utility/tarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,95 @@ class TArray
return i;
}

// !!! THIS REQUIRES AN ELEMENT TYPE THAT'S COMPARABLE WITH THE LT OPERATOR !!!
bool IsSorted()
{
for(unsigned i = 1; i < Count; i++)
{
if(Array[i] < Array[i-1]) return false;
}
return true;
}

// !!! THIS REQUIRES A SORTED OR EMPTY ARRAY !!!
// !!! AND AN ELEMENT TYPE THAT'S COMPARABLE WITH THE LT OPERATOR !!!
//
// exact = false returns the closest match, to be used for, ex., insertions, exact = true returns Size() when no match, like Find does
unsigned int SortedFind(const T& item, bool exact = true) const
{
if(Count == 0) return 0;
if(Count == 1) return (item < Array[0]) ? 0 : 1;

unsigned int lo = 0;
unsigned int hi = Count - 1;

while(lo <= hi)
{
int mid = lo + ((hi - lo) / 2);

if(Array[mid] < item)
{
lo = mid + 1;
}
else if(item < Array[mid])
{
hi = mid - 1;
}
else
{
return mid;
}
}
if(exact)
{
return Count;
}
else
{
return (lo == Count || (item < Array[lo])) ? lo : lo + 1;
}
}

// !!! THIS REQUIRES A SORTED OR EMPTY ARRAY !!!
//
// exact = false returns the closest match, to be used for, ex., insertions, exact = true returns Size() when no match, like Find does
template<typename Func>
unsigned int SortedFind(const T& item, Func lt, bool exact = true) const
{
if(Count == 0) return 0;
if(Count == 1) return lt(item, Array[0]) ? 0 : 1;

unsigned int lo = 0;
unsigned int hi = Count - 1;

while(lo <= hi)
{
int mid = lo + ((hi - lo) / 2);

if(lt(Array[mid], item))
{
lo = mid + 1;
}
else if(lt(item, Array[mid]))
{
if(mid == 0) break; // prevent negative overflow due to unsigned numbers
hi = mid - 1;
}
else
{
return mid;
}
}
if(exact)
{
return Count;
}
else
{
return (lo == Count || lt(item, Array[lo])) ? lo : lo + 1;
}
}

bool Contains(const T& item) const
{
unsigned int i;
Expand Down Expand Up @@ -535,6 +624,11 @@ class TArray
}
}

void SortedInsert (const T &item)
{
Insert (SortedFind (item, false), item);
}

void ShrinkToFit ()
{
if (Most > Count)
Expand Down

0 comments on commit 96ac9ba

Please sign in to comment.