Skip to content

Commit

Permalink
Small improvements and fixes:
Browse files Browse the repository at this point in the history
* fixes error in compressing work-group locals
* optimizes intrisics of local ids and sizes
* command-line options can now parse all optimizations
* annotates the VideoCore documentation with some errata
* combined operations make sure flag is set on add ALU
* fixes infinite loop, see #128
  • Loading branch information
doe300 committed Dec 15, 2018
1 parent cb1fdc8 commit d1405ab
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 21 deletions.
Binary file modified doc/VideoCoreIV-AG100-R.pdf
Binary file not shown.
11 changes: 3 additions & 8 deletions src/asm/OpCodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ static unsigned int rotate_right(unsigned int value, int shift)
return (value >> shift) | (value << (32 - shift));
}

Optional<Value> OpCode::calculate(const Optional<Value>& firstOperand, const Optional<Value>& secondOperand) const
Optional<Value> OpCode::operator()(const Optional<Value>& firstOperand, const Optional<Value>& secondOperand) const
{
if(!firstOperand)
return NO_VALUE;
Expand Down Expand Up @@ -596,10 +596,10 @@ Optional<Value> OpCode::calculate(const Optional<Value>& firstOperand, const Opt
{
Optional<Value> tmp = NO_VALUE;
if(numOperands == 1)
tmp = calculate(
tmp = operator()(
firstVal->hasContainer() ? firstVal->container().elements.at(i) : firstVal.value(), NO_VALUE);
else
tmp = calculate(firstVal->hasContainer() ? firstVal->container().elements.at(i) : firstVal.value(),
tmp = operator()(firstVal->hasContainer() ? firstVal->container().elements.at(i) : firstVal.value(),
secondVal->hasContainer() ? secondVal->container().elements.at(i) : secondVal.value());
if(!tmp)
// result could not be calculated for a single component of the vector, abort
Expand Down Expand Up @@ -707,11 +707,6 @@ Optional<Value> OpCode::calculate(const Optional<Value>& firstOperand, const Opt
return NO_VALUE;
}

Optional<Value> OpCode::operator()(const Optional<Value>& firstOperand, const Optional<Value>& secondOperand) const
{
return calculate(firstOperand, secondOperand);
}

const OpCode& OpCode::toOpCode(const std::string& name)
{
const OpCode& code = findOpCode(name);
Expand Down
1 change: 0 additions & 1 deletion src/asm/OpCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,6 @@ namespace vc4c
/*
* Tries to calculate the operation for this op-code with the operands given
*/
Optional<Value> calculate(const Optional<Value>& firstOperand, const Optional<Value>& secondOperand) const;
Optional<Value> operator()(const Optional<Value>& firstOperand, const Optional<Value>& secondOperand) const;

/*
Expand Down
3 changes: 2 additions & 1 deletion src/intermediate/Operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,8 @@ Operation* MoveOperation::combineWith(const OpCode& otherOpCode) const
// use ADD ALU
op = new Operation(OP_OR, getOutput().value(), getSource(), getSource(), conditional, setFlags);
}
else if(otherOpCode.runsOnAddALU() && (!packMode.hasEffect() || packMode.supportsMulALU()))
else if(otherOpCode.runsOnAddALU() && (!packMode.hasEffect() || packMode.supportsMulALU()) &&
setFlags == SetFlag::DONT_SET)
{
// use MUL ALU
op = new Operation(OP_V8MIN, getOutput().value(), getSource(), getSource(), conditional, setFlags);
Expand Down
34 changes: 34 additions & 0 deletions src/intrinsics/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,40 @@ static NODISCARD InstructionWalker intrinsifyReadWorkItemInfo(Method& method, In
* -> res = (UNIFORM >> (dim * 8)) & 0xFF
*/
const Local* itemInfo = method.findOrCreateLocal(TYPE_INT32, local);
auto literalDim =
arg.getLiteralValue() ? arg : arg.getSingleWriter() ? arg.getSingleWriter()->precalculate() : NO_VALUE;
if(literalDim && literalDim->getLiteralValue())
{
// NOTE: This forces the local_ids/local_sizes values to be on register-file A, but safes an instruction per
// read
switch(literalDim->getLiteralValue()->unsignedInt())
{
case 0:
return it.reset((new MoveOperation(it->getOutput().value(), itemInfo->createReference()))
->setUnpackMode(UNPACK_8A_32)
->copyExtrasFrom(it.get())
->addDecorations(decoration));
case 1:
return it.reset((new MoveOperation(it->getOutput().value(), itemInfo->createReference()))
->setUnpackMode(UNPACK_8B_32)
->copyExtrasFrom(it.get())
->addDecorations(decoration));
case 2:
return it.reset((new MoveOperation(it->getOutput().value(), itemInfo->createReference()))
->setUnpackMode(UNPACK_8C_32)
->copyExtrasFrom(it.get())
->addDecorations(decoration));
case 3:
return it.reset((new MoveOperation(it->getOutput().value(), itemInfo->createReference()))
->setUnpackMode(UNPACK_8D_32)
->copyExtrasFrom(it.get())
->addDecorations(decoration));
default:
return it.reset((new MoveOperation(it->getOutput().value(), INT_ZERO))
->copyExtrasFrom(it.get())
->addDecorations(decoration));
}
}
Value tmp0 = assign(it, TYPE_INT8) = mul24(arg, 8_val);
Value tmp1 = assign(it, TYPE_INT8) = itemInfo->createReference() >> tmp0;
return it.reset(
Expand Down
3 changes: 2 additions & 1 deletion src/normalization/MemoryAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,8 @@ static bool lowerMemoryToVPM(Method& method, const Local* local, MemoryType type
[&](const Local* l) -> bool { return vpmAreas.find(l) != vpmAreas.end(); }))
{
// TODO insert copy from/to VPM. Need to do via read/write
break;
throw CompilationError(
CompilationStep::NORMALIZER, "Copying from/to VPM is not yet implemented", mem->to_string());
}
++it;
continue;
Expand Down
3 changes: 2 additions & 1 deletion src/optimization/Combiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,8 @@ bool optimizations::combineOperations(const Module& module, Method& method, cons
else if(move != nullptr && nextMove != nullptr)
{
bool firstOnMul = (move->packMode.hasEffect() && move->packMode.supportsMulALU()) ||
(nextMove->packMode.hasEffect() && !nextMove->packMode.supportsMulALU());
(nextMove->packMode.hasEffect() && !nextMove->packMode.supportsMulALU()) ||
nextMove->doesSetFlag();
Operation* newMove0 = move->combineWith(firstOnMul ? OP_ADD : OP_MUL24);
Operation* newMove1 = nextMove->combineWith(firstOnMul ? OP_MUL24 : OP_ADD);
if(newMove0 != nullptr && newMove1 != nullptr)
Expand Down
13 changes: 8 additions & 5 deletions src/optimization/Eliminator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,12 +691,15 @@ bool optimizations::eliminateRedundantBitOp(const Module& module, Method& method

const auto& arg0 = op->assertArgument(0);
const auto& arg1 = op->assertArgument(1);
auto out = op->getOutput().value().local();
if(op->getOutput()->hasLocal())
{
auto out = op->getOutput()->local();

if(arg0.hasLocal())
foundOr(out, arg0.local(), it);
if(arg1.hasLocal())
foundOr(out, arg1.local(), it);
if(arg0.hasLocal())
foundOr(out, arg0.local(), it);
if(arg1.hasLocal())
foundOr(out, arg1.local(), it);
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/optimization/LocalCompression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ static void compressLocalIntoRegister(Method& method, const Local& local, const

bool optimizations::compressWorkGroupLocals(const Module& module, Method& method, const Configuration& config)
{
if(method.size() == 0 || method.begin()->empty())
return false;
unsigned char index = 0;
const Value container = method.addNewLocal(TYPE_INT32.toVectorType(16), "%work_group_info");
method.begin()->walk().nextInBlock().emplace(new intermediate::MoveOperation(container, INT_ZERO));
Expand Down
6 changes: 4 additions & 2 deletions src/optimization/Optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ const std::vector<OptimizationPass> Optimizer::ALL_PASSES = {
OptimizationPass("CombineRotations", "combine-rotations", combineVectorRotations,
"combines duplicate vector rotations, e.g. introduced by vector-shuffle into a single rotation",
OptimizationType::REPEAT),
// XXX not enabled with any optimization level for now
OptimizationPass("CommonSubexpressionElimination", "eliminate-common-subexpressions", eliminateCommonSubexpressions,
"eliminates repetitive calculations of common expressions by re-using previous results (WIP, slow)",
OptimizationType::REPEAT),
Expand All @@ -259,8 +260,9 @@ const std::vector<OptimizationPass> Optimizer::ALL_PASSES = {
* can therefore introduce instructions or constructs (e.g. combined instructions) not supported by
* the other optimizations.
*/
// OptimizationPass("CompressWorkGroupInfo", "compress-work-group-info", compressWorkGroupLocals,
// "compresses work-group info into single local", OptimizationType::FINAL),
// XXX not enabled with any optimization level for now
OptimizationPass("CompressWorkGroupInfo", "compress-work-group-info", compressWorkGroupLocals,
"compresses work-group info into single local", OptimizationType::FINAL),
OptimizationPass("SplitReadAfterWrites", "split-read-write", splitReadAfterWrites,
"splits read-after-writes (except if the local is used only very locally), so the reordering and "
"register-allocation have an easier job",
Expand Down
9 changes: 8 additions & 1 deletion src/tools/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@
using namespace vc4c;
using namespace vc4c::tools;

static auto availableOptimizations = vc4c::optimizations::Optimizer::getPasses(OptimizationLevel::FULL);
static std::set<std::string> createAvailableOptimizations()
{
std::set<std::string> opts;
for(const auto& op : vc4c::optimizations::Optimizer::ALL_PASSES)
opts.emplace(op.parameterName);
return opts;
}

bool tools::parseConfigurationParameter(Configuration& config, const std::string& arg)
{
static auto availableOptimizations = createAvailableOptimizations();
if(arg == "-cl-opt-disable")
{
config.optimizationLevel = OptimizationLevel::NONE;
Expand Down
2 changes: 1 addition & 1 deletion test/RegressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ static std::vector<Entry> allKernels =
//Entry{PENDING_BOTH, FAST, "./testing/rodinia/streamcluster-Kernels.cl", ""}, // 64-bit integer
Entry{PENDING_LLVM, FAST, "./testing/rodinia/track_ellipse_kernel.cl", ""},

Entry{PASSED, FAST, "./testing/NVIDIA/BitonicSort.cl", "-DLOCAL_SIZE_LIMIT=8"},
Entry{PENDING_BOTH, FAST, "./testing/NVIDIA/BitonicSort.cl", "-DLOCAL_SIZE_LIMIT=8"},
Entry{PASSED, FAST, "./testing/NVIDIA/BitonicSort_b.cl", ""},
Entry{PASSED, FAST, "./testing/NVIDIA/BlackScholes.cl", "-DLOCAL_SIZE_LIMIT=8"},
Entry{PASSED, FAST, "./testing/NVIDIA/BoxFilter.cl", "-DLOCAL_SIZE_LIMIT=8"},
Expand Down

0 comments on commit d1405ab

Please sign in to comment.