From ad654ba94fdce5aeaaa9848e2b8eaa7c5f449404 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 17 May 2020 14:26:56 +0900 Subject: [PATCH 01/30] Add first implementation --- src/normalization/Normalizer.cpp | 179 +++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 6401eea2..cf9a7630 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -28,6 +28,8 @@ #include #include +#include + using namespace vc4c; using namespace vc4c::normalization; @@ -235,6 +237,157 @@ static void runNormalizationStep( } } +class ValueExpr +{ + virtual std::string to_string() const = 0; +}; + +class ValueBinaryOp : public ValueExpr +{ +public: + enum class BinaryOp + { + Add, Sub, Mul, Div, Other, + }; + + ValueBinaryOp(std::unique_ptr left, BinaryOp op, std::unique_ptr right) : + left(std::move(left)), op(op), right(std::move(right)) {} + + std::string binaryOpToString() + { + switch (op) + { + case BinaryOp::Add: return "add"; + case BinaryOp::Sub: return "sub"; + case BinaryOp::Mul: return "mul"; + case BinaryOp::Div: return "div"; + case BinaryOp::Other: return "other"; + } + } + + std::string to_string() const override + { + return left->to_string() + " " + binaryOpToString(op) + " " + right->to_string(); + } + + std::unique_ptr left; + BinaryOp op; + std::unique_ptr right; +}; + +class ValueLocal : public ValueExpr +{ +public: + ValueLocal(const Local *local) : local(local) {} + + std::string to_string() const override + { + return local->to_string(); + } + + const Local *local; +}; + +std::unique_ptr makeValueBinaryOpFromLocal(const Local* left, ValueBinaryOp::BinaryOp binOp, const Local* right) +{ + // TODO: Check left and right are not nullptr. + + return std::make_unique( + std::make_unique(left), + binOp, + std::make_unique(right)); +} + +std::unique_ptr iiToExpr(const Local* local, const LocalUser* inst) +{ + using BO = ValueBinaryOp::BinaryOp; + BO binOp = BO::Other; + + // add, sub + if (auto op = dynamic_cast(inst)) + { + if (op->op == OP_ADD) + { + binOp = BO::Add; + } + else if (op->op == OP_SUB) + { + binOp = BO::Sub; + } + else + { + // If op is neither add nor sub, return local as-is. + return std::make_unique(local); + } + + auto left = op->getFirstArg().checkLocal(); + auto right = op->getSecondArg()->checkLocal(); + return makeValueBinaryOpFromLocal(left, binOp, right); + } + // mul, div + else if (auto op = dynamic_cast(inst)) + { + if (op->opCode == "mul") + { + binOp = BO::Mul; + } + else if (op->opCode == "div") + { + binOp = BO::Div; + } + else + { + // If op is neither add nor sub, return local as-is. + return std::make_unique(local); + } + + auto left = op->getFirstArg().checkLocal(); + auto right = op->getSecondArg()->checkLocal(); + return makeValueBinaryOpFromLocal(left, binOp, right); + } + + return std::make_unique(local); +} + +void combineDMALoads(const Module& module, Method& method, const Configuration& config) +{ + // vload16(unsigned int, unsigned char*) + const std::string VLOAD16_METHOD_NAME = "_Z7vload16jPU3AS1Kh"; + + std::vector addrValues; + + auto it = method.walkAllInstructions(); + while(!it.isEndOfMethod()) + { + // Find all method calls + if(auto call = it.get()) + { + if (call->methodName == VLOAD16_METHOD_NAME) + { + auto addr = *call->getArgument(0); + logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " << addr.to_string() << logging::endl; + addrValues.push_back(addr); + } + } + + it.nextInMethod(); + } + + std::vector>> addrExprs; + + for (auto &addrValue : addrValues) + { + if(auto loc = addrValue.checkLocal()) + { + if (auto writer = loc->getSingleWriter()) + { + logging::debug() << "addr - writer: " << writer->to_string() << logging::endl; + addrExprs.push_back(std::make_pair(loc, iiToExpr(loc, writer))); + } + } + } +} + void Normalizer::normalize(Module& module) const { // 1. eliminate phi on all methods @@ -253,6 +406,15 @@ void Normalizer::normalize(Module& module) const PROFILE_COUNTER_WITH_PREV(vc4c::profiler::COUNTER_NORMALIZATION + 2, "Eliminate Phi-nodes (after)", method->countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 1); } + + { + auto kernels = module.getKernels(); + for(Method* kernelFunc : kernels) + { + combineDMALoads(module, *kernelFunc, config); + } + } + auto kernels = module.getKernels(); // 2. inline kernel-functions for(Method* kernelFunc : kernels) @@ -266,6 +428,23 @@ void Normalizer::normalize(Module& module) const PROFILE_COUNTER_WITH_PREV(vc4c::profiler::COUNTER_NORMALIZATION + 5, "Inline (after)", kernel.countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 4); } + + { + logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; + for (auto &method : module) { + auto it = method->walkAllInstructions(); + while (!it.isEndOfMethod()) { + auto ii = it.get(); + logging::info() << ii->to_string() << logging::endl; + it = it.nextInMethod(); + } + } + + std::string a; + std::cin >> a; + } + + // 3. run other normalization steps on kernel functions const auto f = [&module, this](Method* kernelFunc) -> void { normalizeMethod(module, *kernelFunc); }; ThreadPool::scheduleAll("Normalization", kernels, f, THREAD_LOGGER.get()); From b83a38ad75293afbf4903184f87c7dd99b95d827 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 23 May 2020 17:19:16 +0900 Subject: [PATCH 02/30] Implement checking the equal difference --- src/normalization/Normalizer.cpp | 317 ++++++++++++++++++++++++++----- 1 file changed, 268 insertions(+), 49 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index cf9a7630..7bde512a 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -239,6 +239,24 @@ static void runNormalizationStep( class ValueExpr { +public: + // signed : value + using ExpandedExprs = std::vector>>; + + virtual ~ValueExpr() = default; + + virtual bool operator==(const ValueExpr& other) const = 0; + inline bool operator!=(const ValueExpr& other) const + { + return !(*this == other); + } + + virtual std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) = 0; + + // expand value expr as liner combination + // e.g. (a + b) * c = a * c + b * c + virtual void expand(ExpandedExprs& exprs) = 0; + virtual std::string to_string() const = 0; }; @@ -250,60 +268,169 @@ class ValueBinaryOp : public ValueExpr Add, Sub, Mul, Div, Other, }; - ValueBinaryOp(std::unique_ptr left, BinaryOp op, std::unique_ptr right) : - left(std::move(left)), op(op), right(std::move(right)) {} + ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : + left(left), op(op), right(right) {} + + bool operator==(const ValueExpr& other) const override + { + if(auto otherOp = dynamic_cast(&other)) + { + return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; + } + return false; + } + + std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override + { + return std::make_shared( + left->replaceLocal(value, expr), + op, + right->replaceLocal(value, expr)); + } - std::string binaryOpToString() + void expand(ExpandedExprs& exprs) override { switch (op) { - case BinaryOp::Add: return "add"; - case BinaryOp::Sub: return "sub"; - case BinaryOp::Mul: return "mul"; - case BinaryOp::Div: return "div"; - case BinaryOp::Other: return "other"; + case BinaryOp::Add: + { + left->expand(exprs); + right->expand(exprs); + break; + } + case BinaryOp::Sub: + { + left->expand(exprs); + + ExpandedExprs temp; + right->expand(temp); + for (auto &e : temp) + { + e.first = !e.first; + } + exprs.insert(exprs.end(), temp.begin(), temp.end()); + break; + } + case BinaryOp::Mul: + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + break; + } + case BinaryOp::Div: + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + break; + } + case BinaryOp::Other: break; } } std::string to_string() const override { - return left->to_string() + " " + binaryOpToString(op) + " " + right->to_string(); + std::string opStr; + switch (op) + { + case BinaryOp::Add: opStr = "+"; break; + case BinaryOp::Sub: opStr = "-"; break; + case BinaryOp::Mul: opStr = "*"; break; + case BinaryOp::Div: opStr = "/"; break; + case BinaryOp::Other: opStr = "other"; break; + } + + return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; } - std::unique_ptr left; + // std::shared_ptr as_shared() const + // { + // return std::make_shared(left, op, right); + // } + + std::shared_ptr left; BinaryOp op; - std::unique_ptr right; + std::shared_ptr right; }; -class ValueLocal : public ValueExpr +class ValueTerm : public ValueExpr { public: - ValueLocal(const Local *local) : local(local) {} + // TODO: remove the sign parameter + ValueTerm(Value& value, bool sign) : value(value) {} + + std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override + { + if (auto fromLocal = from.checkLocal()) + { + if (auto valueLocal = value.checkLocal()) + { + if (*fromLocal == *valueLocal) + { + return expr; + } + } + } + return std::make_shared(value, true); + } + + void expand(ExpandedExprs& exprs) override + { + exprs.push_back(std::make_pair(true, std::make_shared(value, true))); + } + + bool operator==(const ValueExpr& other) const override + { + if(auto otherTerm = dynamic_cast(&other)) + return value == otherTerm->value; + return false; + } std::string to_string() const override { - return local->to_string(); + return value.to_string(); } - const Local *local; + Value value; }; -std::unique_ptr makeValueBinaryOpFromLocal(const Local* left, ValueBinaryOp::BinaryOp binOp, const Local* right) +std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { - // TODO: Check left and right are not nullptr. - - return std::make_unique( - std::make_unique(left), + return std::make_shared( + std::make_shared(left, true), binOp, - std::make_unique(right)); + std::make_shared(right, true)); } -std::unique_ptr iiToExpr(const Local* local, const LocalUser* inst) +// try to convert shl to mul and return it as ValueExpr +std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) +{ + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + int shiftValue = 0; + if (auto lit = right.checkLiteral()) + { + shiftValue = lit->signedInt(); + } + else if (auto imm = right.checkImmediate()) + { + shiftValue = imm->getIntegerValue().value_or(0); + } + + if (shiftValue > 0) + { + auto right = Value(Literal(1 << shiftValue), TYPE_INT32); + return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); + } + else + { + return std::make_shared(value, true); + } +} + +std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) { using BO = ValueBinaryOp::BinaryOp; BO binOp = BO::Other; - // add, sub + // add, sub, shr, shl, asr if (auto op = dynamic_cast(inst)) { if (op->op == OP_ADD) @@ -314,14 +441,20 @@ std::unique_ptr iiToExpr(const Local* local, const LocalUser* inst) { binOp = BO::Sub; } + else if (op->op == OP_SHL) + { + // convert shl to mul + return shlToMul(value, op); + // TODO: shr, asr + } else { - // If op is neither add nor sub, return local as-is. - return std::make_unique(local); + // If op is neither add nor sub, return value as-is. + return std::make_shared(value, true); } - auto left = op->getFirstArg().checkLocal(); - auto right = op->getSecondArg()->checkLocal(); + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); return makeValueBinaryOpFromLocal(left, binOp, right); } // mul, div @@ -337,16 +470,56 @@ std::unique_ptr iiToExpr(const Local* local, const LocalUser* inst) } else { - // If op is neither add nor sub, return local as-is. - return std::make_unique(local); + // If op is neither add nor sub, return value as-is. + return std::make_shared(value, true); } - auto left = op->getFirstArg().checkLocal(); - auto right = op->getSecondArg()->checkLocal(); + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); return makeValueBinaryOpFromLocal(left, binOp, right); } - return std::make_unique(local); + return std::make_shared(value, true); +} + +std::shared_ptr calcValueExpr(std::shared_ptr expr) +{ + using BO = ValueBinaryOp::BinaryOp; + + ValueExpr::ExpandedExprs expanded; + expr->expand(expanded); + + for (auto &p : expanded) logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + logging::debug() << logging::endl; + + for (auto p = expanded.begin(); p != expanded.end(); ) + { + auto comp = std::find_if(expanded.begin(), expanded.end(), + [&p](const std::pair>& other) { + return p->first != other.first && *p->second == *other.second; + }); + if (comp != expanded.end()) + { + expanded.erase(comp); + p = expanded.erase(p); + } + else + { + p++; + } + } + + for (auto &p : expanded) logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + logging::debug() << logging::endl; + + auto result = expanded[0].second; + for (size_t i = 1; i < expanded.size(); i++) + { + auto p = expanded[i]; + result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); + } + + return result; } void combineDMALoads(const Module& module, Method& method, const Configuration& config) @@ -373,7 +546,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& it.nextInMethod(); } - std::vector>> addrExprs; + std::vector>> addrExprs; for (auto &addrValue : addrValues) { @@ -381,11 +554,58 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { if (auto writer = loc->getSingleWriter()) { - logging::debug() << "addr - writer: " << writer->to_string() << logging::endl; - addrExprs.push_back(std::make_pair(loc, iiToExpr(loc, writer))); + addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); } } + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); + } } + + for (auto &p : addrExprs) + { + logging::debug() << p.first.to_string() << " -> " << p.second->to_string() << logging::endl; + } + + for (auto ¤t : addrExprs) + { + for (auto &other : addrExprs) + { + auto replaced = current.second->replaceLocal(other.first, other.second); + current.second = replaced; + } + } + + for (auto &pair : addrExprs) + { + logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; + } + + std::shared_ptr diff = nullptr; + bool eqDiff = true; + for (size_t i = 1; i < addrExprs.size(); i++) + { + auto x = addrExprs[i - 1].second; + auto y = addrExprs[i].second; + auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); + + auto currentDiff = calcValueExpr(diffExpr); + + logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; + + if (diff == nullptr) + { + diff = currentDiff; + } + if (*currentDiff != *diff) + { + eqDiff = false; + break; + } + } + + logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; } void Normalizer::normalize(Module& module) const @@ -415,20 +635,6 @@ void Normalizer::normalize(Module& module) const } } - auto kernels = module.getKernels(); - // 2. inline kernel-functions - for(Method* kernelFunc : kernels) - { - Method& kernel = *kernelFunc; - - PROFILE_COUNTER(vc4c::profiler::COUNTER_NORMALIZATION + 4, "Inline (before)", kernel.countInstructions()); - PROFILE_START(Inline); - inlineMethods(module, kernel, config); - PROFILE_END(Inline); - PROFILE_COUNTER_WITH_PREV(vc4c::profiler::COUNTER_NORMALIZATION + 5, "Inline (after)", - kernel.countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 4); - } - { logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; for (auto &method : module) { @@ -444,6 +650,19 @@ void Normalizer::normalize(Module& module) const std::cin >> a; } + auto kernels = module.getKernels(); + // 2. inline kernel-functions + for(Method* kernelFunc : kernels) + { + Method& kernel = *kernelFunc; + + PROFILE_COUNTER(vc4c::profiler::COUNTER_NORMALIZATION + 4, "Inline (before)", kernel.countInstructions()); + PROFILE_START(Inline); + inlineMethods(module, kernel, config); + PROFILE_END(Inline); + PROFILE_COUNTER_WITH_PREV(vc4c::profiler::COUNTER_NORMALIZATION + 5, "Inline (after)", + kernel.countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 4); + } // 3. run other normalization steps on kernel functions const auto f = [&module, this](Method* kernelFunc) -> void { normalizeMethod(module, *kernelFunc); }; From 77150c68ed62763e6fc93dc620df95e4143f68d6 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 25 May 2020 15:01:07 +0900 Subject: [PATCH 03/30] Format --- src/normalization/Normalizer.cpp | 571 ++++++++++++++++--------------- 1 file changed, 294 insertions(+), 277 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 7bde512a..b77c7138 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -240,372 +240,387 @@ static void runNormalizationStep( class ValueExpr { public: - // signed : value - using ExpandedExprs = std::vector>>; + // signed : value + using ExpandedExprs = std::vector>>; - virtual ~ValueExpr() = default; + virtual ~ValueExpr() = default; - virtual bool operator==(const ValueExpr& other) const = 0; - inline bool operator!=(const ValueExpr& other) const - { - return !(*this == other); - } + virtual bool operator==(const ValueExpr& other) const = 0; + inline bool operator!=(const ValueExpr& other) const + { + return !(*this == other); + } - virtual std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) = 0; + virtual std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) = 0; - // expand value expr as liner combination - // e.g. (a + b) * c = a * c + b * c - virtual void expand(ExpandedExprs& exprs) = 0; + // expand value expr as liner combination + // e.g. (a + b) * c = a * c + b * c + virtual void expand(ExpandedExprs& exprs) = 0; - virtual std::string to_string() const = 0; + virtual std::string to_string() const = 0; }; class ValueBinaryOp : public ValueExpr { public: - enum class BinaryOp - { - Add, Sub, Mul, Div, Other, - }; + enum class BinaryOp + { + Add, + Sub, + Mul, + Div, + Other, + }; - ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : - left(left), op(op), right(right) {} + ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : + left(left), op(op), right(right) + { + } - bool operator==(const ValueExpr& other) const override - { - if(auto otherOp = dynamic_cast(&other)) + bool operator==(const ValueExpr& other) const override { - return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; + if(auto otherOp = dynamic_cast(&other)) + { + return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; + } + return false; } - return false; - } - std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override - { - return std::make_shared( - left->replaceLocal(value, expr), - op, - right->replaceLocal(value, expr)); - } + std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override + { + return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); + } - void expand(ExpandedExprs& exprs) override - { - switch (op) + void expand(ExpandedExprs& exprs) override { - case BinaryOp::Add: + switch(op) { - left->expand(exprs); - right->expand(exprs); - break; + case BinaryOp::Add: + { + left->expand(exprs); + right->expand(exprs); + break; } - case BinaryOp::Sub: + case BinaryOp::Sub: { - left->expand(exprs); + left->expand(exprs); - ExpandedExprs temp; - right->expand(temp); - for (auto &e : temp) - { - e.first = !e.first; - } - exprs.insert(exprs.end(), temp.begin(), temp.end()); - break; + ExpandedExprs temp; + right->expand(temp); + for(auto& e : temp) + { + e.first = !e.first; + } + exprs.insert(exprs.end(), temp.begin(), temp.end()); + break; } - case BinaryOp::Mul: + case BinaryOp::Mul: { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - break; + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + break; + } + case BinaryOp::Div: + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + break; + } + case BinaryOp::Other: + break; } - case BinaryOp::Div: - { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - break; - } - case BinaryOp::Other: break; } - } - std::string to_string() const override - { - std::string opStr; - switch (op) + std::string to_string() const override { - case BinaryOp::Add: opStr = "+"; break; - case BinaryOp::Sub: opStr = "-"; break; - case BinaryOp::Mul: opStr = "*"; break; - case BinaryOp::Div: opStr = "/"; break; - case BinaryOp::Other: opStr = "other"; break; - } + std::string opStr; + switch(op) + { + case BinaryOp::Add: + opStr = "+"; + break; + case BinaryOp::Sub: + opStr = "-"; + break; + case BinaryOp::Mul: + opStr = "*"; + break; + case BinaryOp::Div: + opStr = "/"; + break; + case BinaryOp::Other: + opStr = "other"; + break; + } - return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; - } + return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; + } - // std::shared_ptr as_shared() const - // { - // return std::make_shared(left, op, right); - // } + // std::shared_ptr as_shared() const + // { + // return std::make_shared(left, op, right); + // } - std::shared_ptr left; - BinaryOp op; - std::shared_ptr right; + std::shared_ptr left; + BinaryOp op; + std::shared_ptr right; }; class ValueTerm : public ValueExpr { public: - // TODO: remove the sign parameter - ValueTerm(Value& value, bool sign) : value(value) {} + // TODO: remove the sign parameter + ValueTerm(Value& value, bool sign) : value(value) {} - std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override - { - if (auto fromLocal = from.checkLocal()) + std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override { - if (auto valueLocal = value.checkLocal()) - { - if (*fromLocal == *valueLocal) + if(auto fromLocal = from.checkLocal()) { - return expr; + if(auto valueLocal = value.checkLocal()) + { + if(*fromLocal == *valueLocal) + { + return expr; + } + } } - } + return std::make_shared(value, true); } - return std::make_shared(value, true); - } - void expand(ExpandedExprs& exprs) override - { - exprs.push_back(std::make_pair(true, std::make_shared(value, true))); - } + void expand(ExpandedExprs& exprs) override + { + exprs.push_back(std::make_pair(true, std::make_shared(value, true))); + } - bool operator==(const ValueExpr& other) const override - { - if(auto otherTerm = dynamic_cast(&other)) - return value == otherTerm->value; - return false; - } + bool operator==(const ValueExpr& other) const override + { + if(auto otherTerm = dynamic_cast(&other)) + return value == otherTerm->value; + return false; + } - std::string to_string() const override - { - return value.to_string(); - } + std::string to_string() const override + { + return value.to_string(); + } - Value value; + Value value; }; std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { - return std::make_shared( - std::make_shared(left, true), - binOp, - std::make_shared(right, true)); + return std::make_shared( + std::make_shared(left, true), binOp, std::make_shared(right, true)); } // try to convert shl to mul and return it as ValueExpr std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) { - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - int shiftValue = 0; - if (auto lit = right.checkLiteral()) - { - shiftValue = lit->signedInt(); - } - else if (auto imm = right.checkImmediate()) - { - shiftValue = imm->getIntegerValue().value_or(0); - } - - if (shiftValue > 0) - { - auto right = Value(Literal(1 << shiftValue), TYPE_INT32); - return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); - } - else - { - return std::make_shared(value, true); - } -} - -std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) -{ - using BO = ValueBinaryOp::BinaryOp; - BO binOp = BO::Other; - - // add, sub, shr, shl, asr - if (auto op = dynamic_cast(inst)) - { - if (op->op == OP_ADD) + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + int shiftValue = 0; + if(auto lit = right.checkLiteral()) { - binOp = BO::Add; + shiftValue = lit->signedInt(); } - else if (op->op == OP_SUB) + else if(auto imm = right.checkImmediate()) { - binOp = BO::Sub; + shiftValue = imm->getIntegerValue().value_or(0); } - else if (op->op == OP_SHL) + + if(shiftValue > 0) { - // convert shl to mul - return shlToMul(value, op); - // TODO: shr, asr + auto right = Value(Literal(1 << shiftValue), TYPE_INT32); + return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); } else { - // If op is neither add nor sub, return value as-is. - return std::make_shared(value, true); + return std::make_shared(value, true); } +} - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - return makeValueBinaryOpFromLocal(left, binOp, right); - } - // mul, div - else if (auto op = dynamic_cast(inst)) - { - if (op->opCode == "mul") - { - binOp = BO::Mul; - } - else if (op->opCode == "div") +std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) +{ + using BO = ValueBinaryOp::BinaryOp; + BO binOp = BO::Other; + + // add, sub, shr, shl, asr + if(auto op = dynamic_cast(inst)) { - binOp = BO::Div; + if(op->op == OP_ADD) + { + binOp = BO::Add; + } + else if(op->op == OP_SUB) + { + binOp = BO::Sub; + } + else if(op->op == OP_SHL) + { + // convert shl to mul + return shlToMul(value, op); + // TODO: shr, asr + } + else + { + // If op is neither add nor sub, return value as-is. + return std::make_shared(value, true); + } + + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, binOp, right); } - else + // mul, div + else if(auto op = dynamic_cast(inst)) { - // If op is neither add nor sub, return value as-is. - return std::make_shared(value, true); - } + if(op->opCode == "mul") + { + binOp = BO::Mul; + } + else if(op->opCode == "div") + { + binOp = BO::Div; + } + else + { + // If op is neither add nor sub, return value as-is. + return std::make_shared(value, true); + } - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - return makeValueBinaryOpFromLocal(left, binOp, right); - } + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, binOp, right); + } - return std::make_shared(value, true); + return std::make_shared(value, true); } std::shared_ptr calcValueExpr(std::shared_ptr expr) { - using BO = ValueBinaryOp::BinaryOp; + using BO = ValueBinaryOp::BinaryOp; - ValueExpr::ExpandedExprs expanded; - expr->expand(expanded); + ValueExpr::ExpandedExprs expanded; + expr->expand(expanded); - for (auto &p : expanded) logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; - logging::debug() << logging::endl; + for(auto& p : expanded) + logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + logging::debug() << logging::endl; - for (auto p = expanded.begin(); p != expanded.end(); ) - { - auto comp = std::find_if(expanded.begin(), expanded.end(), - [&p](const std::pair>& other) { - return p->first != other.first && *p->second == *other.second; - }); - if (comp != expanded.end()) + for(auto p = expanded.begin(); p != expanded.end();) { - expanded.erase(comp); - p = expanded.erase(p); - } - else - { - p++; + auto comp = std::find_if( + expanded.begin(), expanded.end(), [&p](const std::pair>& other) { + return p->first != other.first && *p->second == *other.second; + }); + if(comp != expanded.end()) + { + expanded.erase(comp); + p = expanded.erase(p); + } + else + { + p++; + } } - } - for (auto &p : expanded) logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; - logging::debug() << logging::endl; + for(auto& p : expanded) + logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + logging::debug() << logging::endl; - auto result = expanded[0].second; - for (size_t i = 1; i < expanded.size(); i++) - { - auto p = expanded[i]; - result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); - } + auto result = expanded[0].second; + for(size_t i = 1; i < expanded.size(); i++) + { + auto p = expanded[i]; + result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); + } - return result; + return result; } void combineDMALoads(const Module& module, Method& method, const Configuration& config) { - // vload16(unsigned int, unsigned char*) - const std::string VLOAD16_METHOD_NAME = "_Z7vload16jPU3AS1Kh"; + // vload16(unsigned int, unsigned char*) + const std::string VLOAD16_METHOD_NAME = "_Z7vload16jPU3AS1Kh"; - std::vector addrValues; + std::vector addrValues; - auto it = method.walkAllInstructions(); - while(!it.isEndOfMethod()) - { - // Find all method calls - if(auto call = it.get()) + auto it = method.walkAllInstructions(); + while(!it.isEndOfMethod()) { - if (call->methodName == VLOAD16_METHOD_NAME) - { - auto addr = *call->getArgument(0); - logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " << addr.to_string() << logging::endl; - addrValues.push_back(addr); - } - } + // Find all method calls + if(auto call = it.get()) + { + if(call->methodName == VLOAD16_METHOD_NAME) + { + auto addr = *call->getArgument(0); + logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " + << addr.to_string() << logging::endl; + addrValues.push_back(addr); + } + } - it.nextInMethod(); - } + it.nextInMethod(); + } - std::vector>> addrExprs; + std::vector>> addrExprs; - for (auto &addrValue : addrValues) - { - if(auto loc = addrValue.checkLocal()) + for(auto& addrValue : addrValues) { - if (auto writer = loc->getSingleWriter()) - { - addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); - } + if(auto loc = addrValue.checkLocal()) + { + if(auto writer = loc->getSingleWriter()) + { + addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + } + } + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); + } } - else + + for(auto& p : addrExprs) { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); + logging::debug() << p.first.to_string() << " -> " << p.second->to_string() << logging::endl; } - } - - for (auto &p : addrExprs) - { - logging::debug() << p.first.to_string() << " -> " << p.second->to_string() << logging::endl; - } - for (auto ¤t : addrExprs) - { - for (auto &other : addrExprs) + for(auto& current : addrExprs) { - auto replaced = current.second->replaceLocal(other.first, other.second); - current.second = replaced; + for(auto& other : addrExprs) + { + auto replaced = current.second->replaceLocal(other.first, other.second); + current.second = replaced; + } } - } - for (auto &pair : addrExprs) - { - logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - } + for(auto& pair : addrExprs) + { + logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; + } - std::shared_ptr diff = nullptr; - bool eqDiff = true; - for (size_t i = 1; i < addrExprs.size(); i++) - { - auto x = addrExprs[i - 1].second; - auto y = addrExprs[i].second; - auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); + std::shared_ptr diff = nullptr; + bool eqDiff = true; + for(size_t i = 1; i < addrExprs.size(); i++) + { + auto x = addrExprs[i - 1].second; + auto y = addrExprs[i].second; + auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); - auto currentDiff = calcValueExpr(diffExpr); + auto currentDiff = calcValueExpr(diffExpr); - logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; + logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; - if (diff == nullptr) - { - diff = currentDiff; - } - if (*currentDiff != *diff) - { - eqDiff = false; - break; + if(diff == nullptr) + { + diff = currentDiff; + } + if(*currentDiff != *diff) + { + eqDiff = false; + break; + } } - } - logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; } void Normalizer::normalize(Module& module) const @@ -628,26 +643,28 @@ void Normalizer::normalize(Module& module) const } { - auto kernels = module.getKernels(); - for(Method* kernelFunc : kernels) - { - combineDMALoads(module, *kernelFunc, config); - } + auto kernels = module.getKernels(); + for(Method* kernelFunc : kernels) + { + combineDMALoads(module, *kernelFunc, config); + } } { - logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; - for (auto &method : module) { - auto it = method->walkAllInstructions(); - while (!it.isEndOfMethod()) { - auto ii = it.get(); - logging::info() << ii->to_string() << logging::endl; - it = it.nextInMethod(); + logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; + for(auto& method : module) + { + auto it = method->walkAllInstructions(); + while(!it.isEndOfMethod()) + { + auto ii = it.get(); + logging::info() << ii->to_string() << logging::endl; + it = it.nextInMethod(); + } } - } - std::string a; - std::cin >> a; + std::string a; + std::cin >> a; } auto kernels = module.getKernels(); From 76952cfd9b20a91c876ba480aded31d2f94c4bdc Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 25 May 2020 16:10:52 +0900 Subject: [PATCH 04/30] Add implementation for other cases --- src/normalization/Normalizer.cpp | 242 +++++++++++++++++++++---------- 1 file changed, 162 insertions(+), 80 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index b77c7138..f0215d96 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -257,6 +257,8 @@ class ValueExpr // e.g. (a + b) * c = a * c + b * c virtual void expand(ExpandedExprs& exprs) = 0; + virtual Optional getInteger() const = 0; + virtual std::string to_string() const = 0; }; @@ -277,21 +279,122 @@ class ValueBinaryOp : public ValueExpr { } + bool operator==(const ValueExpr& other) const override; + + std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override; + + void expand(ExpandedExprs& exprs) override; + + Optional getInteger() const override; + + std::string to_string() const override; + + std::shared_ptr left; + BinaryOp op; + std::shared_ptr right; +}; + +class ValueTerm : public ValueExpr +{ +public: + // TODO: remove the sign parameter + ValueTerm(Value& value, bool sign) : value(value) {} + bool operator==(const ValueExpr& other) const override { - if(auto otherOp = dynamic_cast(&other)) - { - return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; - } + if(auto otherTerm = dynamic_cast(&other)) + return value == otherTerm->value; return false; } - std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override + std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override { - return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); + if(auto fromLocal = from.checkLocal()) + { + if(auto valueLocal = value.checkLocal()) + { + if(*fromLocal == *valueLocal) + { + return expr; + } + } + } + return std::make_shared(value, true); } void expand(ExpandedExprs& exprs) override + { + exprs.push_back(std::make_pair(true, std::make_shared(value, true))); + } + + Optional getInteger() const override + { + if(auto lit = value.checkLiteral()) + { + return Optional(lit->signedInt()); + } + else if(auto imm = value.checkImmediate()) + { + return imm->getIntegerValue(); + } + return Optional(); + } + + std::string to_string() const override + { + return value.to_string(); + } + + Value value; +}; + +bool ValueBinaryOp::operator==(const ValueExpr& other) const +{ + if(auto otherOp = dynamic_cast(&other)) + { + return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; + } + return false; +} + +std::shared_ptr ValueBinaryOp::replaceLocal(Value& value, std::shared_ptr expr) +{ + return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); +} + +void ValueBinaryOp::expand(ExpandedExprs& exprs) +{ + auto leftNum = left->getInteger(); + auto rightNum = right->getInteger(); + if(leftNum && rightNum) + { + int l = leftNum.value_or(0); + int r = rightNum.value_or(0); + int num = 0; + switch(op) + { + case BinaryOp::Add: + num = l + r; + break; + case BinaryOp::Sub: + num = l - r; + break; + case BinaryOp::Mul: + num = l * r; + break; + case BinaryOp::Div: + num = l / r; + break; + case BinaryOp::Other: + break; + } + + int sign = num >= 0; + auto value = Value(Literal(std::abs(num)), TYPE_INT32); + std::shared_ptr expr = std::make_shared(value, true); + exprs.push_back(std::make_pair(sign, expr)); + } + else { switch(op) { @@ -316,7 +419,29 @@ class ValueBinaryOp : public ValueExpr } case BinaryOp::Mul: { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + if(leftNum || rightNum) + { + int num = 0; + std::shared_ptr expr = nullptr; + if(leftNum) + { + num = leftNum.value_or(0); + expr = right; + } + else + { + num = rightNum.value_or(0); + expr = left; + } + for(int i = 0; i < num; i++) + { + exprs.push_back(std::make_pair(true, expr)); + } + } + else + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + } break; } case BinaryOp::Div: @@ -328,82 +453,37 @@ class ValueBinaryOp : public ValueExpr break; } } +} - std::string to_string() const override - { - std::string opStr; - switch(op) - { - case BinaryOp::Add: - opStr = "+"; - break; - case BinaryOp::Sub: - opStr = "-"; - break; - case BinaryOp::Mul: - opStr = "*"; - break; - case BinaryOp::Div: - opStr = "/"; - break; - case BinaryOp::Other: - opStr = "other"; - break; - } - - return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; - } - - // std::shared_ptr as_shared() const - // { - // return std::make_shared(left, op, right); - // } - - std::shared_ptr left; - BinaryOp op; - std::shared_ptr right; -}; - -class ValueTerm : public ValueExpr +Optional ValueBinaryOp::getInteger() const { -public: - // TODO: remove the sign parameter - ValueTerm(Value& value, bool sign) : value(value) {} - - std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override - { - if(auto fromLocal = from.checkLocal()) - { - if(auto valueLocal = value.checkLocal()) - { - if(*fromLocal == *valueLocal) - { - return expr; - } - } - } - return std::make_shared(value, true); - } - - void expand(ExpandedExprs& exprs) override - { - exprs.push_back(std::make_pair(true, std::make_shared(value, true))); - } - - bool operator==(const ValueExpr& other) const override - { - if(auto otherTerm = dynamic_cast(&other)) - return value == otherTerm->value; - return false; - } - - std::string to_string() const override - { - return value.to_string(); - } + return Optional(); +} - Value value; -}; +std::string ValueBinaryOp::to_string() const +{ + std::string opStr; + switch(op) + { + case BinaryOp::Add: + opStr = "+"; + break; + case BinaryOp::Sub: + opStr = "-"; + break; + case BinaryOp::Mul: + opStr = "*"; + break; + case BinaryOp::Div: + opStr = "/"; + break; + case BinaryOp::Other: + opStr = "other"; + break; + } + + return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; +} std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { @@ -606,6 +686,8 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); auto currentDiff = calcValueExpr(diffExpr); + // Apply calcValueExpr again for integer literals. + currentDiff = calcValueExpr(currentDiff); logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; From a5fd0d34de7fb65f3a19edd75ca05916c7c135fe Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 25 May 2020 16:27:23 +0900 Subject: [PATCH 05/30] Fix to process at each blocks --- src/normalization/Normalizer.cpp | 119 +++++++++++++++---------------- 1 file changed, 56 insertions(+), 63 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index f0215d96..e3323fb2 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -602,10 +602,6 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) } } - for(auto& p : expanded) - logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; - logging::debug() << logging::endl; - auto result = expanded[0].second; for(size_t i = 1; i < expanded.size(); i++) { @@ -621,88 +617,85 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // vload16(unsigned int, unsigned char*) const std::string VLOAD16_METHOD_NAME = "_Z7vload16jPU3AS1Kh"; - std::vector addrValues; - - auto it = method.walkAllInstructions(); - while(!it.isEndOfMethod()) + for(auto& bb : method) { - // Find all method calls - if(auto call = it.get()) + std::vector addrValues; + for(auto& it : bb) { - if(call->methodName == VLOAD16_METHOD_NAME) + // Find all method calls + if(auto call = dynamic_cast(it.get())) { - auto addr = *call->getArgument(0); - logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " - << addr.to_string() << logging::endl; - addrValues.push_back(addr); + if(call->methodName == VLOAD16_METHOD_NAME) + { + auto addr = *call->getArgument(0); + logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " + << addr.to_string() << logging::endl; + addrValues.push_back(addr); + } } } - it.nextInMethod(); - } + if(addrValues.size() == 0) + continue; - std::vector>> addrExprs; + std::vector>> addrExprs; - for(auto& addrValue : addrValues) - { - if(auto loc = addrValue.checkLocal()) + for(auto& addrValue : addrValues) { - if(auto writer = loc->getSingleWriter()) + if(auto loc = addrValue.checkLocal()) { - addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + if(auto writer = loc->getSingleWriter()) + { + addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + } + } + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); } } - else + + for(auto& current : addrExprs) { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); + for(auto& other : addrExprs) + { + auto replaced = current.second->replaceLocal(other.first, other.second); + current.second = replaced; + } } - } - for(auto& p : addrExprs) - { - logging::debug() << p.first.to_string() << " -> " << p.second->to_string() << logging::endl; - } - - for(auto& current : addrExprs) - { - for(auto& other : addrExprs) + for(auto& pair : addrExprs) { - auto replaced = current.second->replaceLocal(other.first, other.second); - current.second = replaced; + logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; } - } - for(auto& pair : addrExprs) - { - logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - } - - std::shared_ptr diff = nullptr; - bool eqDiff = true; - for(size_t i = 1; i < addrExprs.size(); i++) - { - auto x = addrExprs[i - 1].second; - auto y = addrExprs[i].second; - auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); + std::shared_ptr diff = nullptr; + bool eqDiff = true; + for(size_t i = 1; i < addrExprs.size(); i++) + { + auto x = addrExprs[i - 1].second; + auto y = addrExprs[i].second; + auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); - auto currentDiff = calcValueExpr(diffExpr); - // Apply calcValueExpr again for integer literals. - currentDiff = calcValueExpr(currentDiff); + auto currentDiff = calcValueExpr(diffExpr); + // Apply calcValueExpr again for integer literals. + currentDiff = calcValueExpr(currentDiff); - logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; + logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; - if(diff == nullptr) - { - diff = currentDiff; - } - if(*currentDiff != *diff) - { - eqDiff = false; - break; + if(diff == nullptr) + { + diff = currentDiff; + } + if(*currentDiff != *diff) + { + eqDiff = false; + break; + } } - } - logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + } } void Normalizer::normalize(Module& module) const From 57f4bffc945fe5becbf5f437af81634933881c57 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 13 Jun 2020 16:30:58 +0900 Subject: [PATCH 06/30] Fix a little --- src/normalization/Normalizer.cpp | 69 +++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index e3323fb2..16a98d78 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -297,8 +297,7 @@ class ValueBinaryOp : public ValueExpr class ValueTerm : public ValueExpr { public: - // TODO: remove the sign parameter - ValueTerm(Value& value, bool sign) : value(value) {} + ValueTerm(Value& value) : value(value) {} bool operator==(const ValueExpr& other) const override { @@ -319,12 +318,12 @@ class ValueTerm : public ValueExpr } } } - return std::make_shared(value, true); + return std::make_shared(value); } void expand(ExpandedExprs& exprs) override { - exprs.push_back(std::make_pair(true, std::make_shared(value, true))); + exprs.push_back(std::make_pair(true, std::make_shared(value))); } Optional getInteger() const override @@ -391,7 +390,7 @@ void ValueBinaryOp::expand(ExpandedExprs& exprs) int sign = num >= 0; auto value = Value(Literal(std::abs(num)), TYPE_INT32); - std::shared_ptr expr = std::make_shared(value, true); + std::shared_ptr expr = std::make_shared(value); exprs.push_back(std::make_pair(sign, expr)); } else @@ -488,7 +487,7 @@ std::string ValueBinaryOp::to_string() const std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { return std::make_shared( - std::make_shared(left, true), binOp, std::make_shared(right, true)); + std::make_shared(left), binOp, std::make_shared(right)); } // try to convert shl to mul and return it as ValueExpr @@ -513,7 +512,7 @@ std::shared_ptr shlToMul(Value& value, const intermediate::Operation* } else { - return std::make_shared(value, true); + return std::make_shared(value); } } @@ -542,7 +541,7 @@ std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) else { // If op is neither add nor sub, return value as-is. - return std::make_shared(value, true); + return std::make_shared(value); } auto left = op->getFirstArg(); @@ -563,7 +562,7 @@ std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) else { // If op is neither add nor sub, return value as-is. - return std::make_shared(value, true); + return std::make_shared(value); } auto left = op->getFirstArg(); @@ -571,7 +570,7 @@ std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) return makeValueBinaryOpFromLocal(left, binOp, right); } - return std::make_shared(value, true); + return std::make_shared(value); } std::shared_ptr calcValueExpr(std::shared_ptr expr) @@ -619,6 +618,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& for(auto& bb : method) { + std::vector loadInstrs; std::vector addrValues; for(auto& it : bb) { @@ -631,6 +631,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " << addr.to_string() << logging::endl; addrValues.push_back(addr); + loadInstrs.push_back(call); } } } @@ -651,7 +652,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } else { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue, true))); + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); } } @@ -695,6 +696,35 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + + if (eqDiff) + { + auto it = bb.walk(); + bool firstCall = true; + while (!it.isEndOfBlock()) + { + auto call = it.get(); + if (call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + { + if (firstCall) + { + firstCall = false; + + // TODO: limit loadInstrs.size() + // it.reset(DMA load); + } + else + { + // it.reset(VPM load); + it.erase(); + } + } + else + { + it.nextInBlock(); + } + } + } } } @@ -717,6 +747,23 @@ void Normalizer::normalize(Module& module) const method->countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 1); } + { + logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; + for(auto& method : module) + { + auto it = method->walkAllInstructions(); + while(!it.isEndOfMethod()) + { + auto ii = it.get(); + logging::info() << ii->to_string() << logging::endl; + it = it.nextInMethod(); + } + } + + std::string a; + std::cin >> a; + } + { auto kernels = module.getKernels(); for(Method* kernelFunc : kernels) From a5a1397f1aac88c85f77220252f1378403cbc2ad Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 20 Jun 2020 20:22:33 +0900 Subject: [PATCH 07/30] Use memory pitch from the difference --- src/normalization/Normalizer.cpp | 66 ++++++++++++++++++++++---------- src/periphery/VPM.cpp | 5 ++- src/periphery/VPM.h | 2 +- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 16a98d78..5ee8e9a9 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -16,6 +16,7 @@ #include "../optimization/ControlFlow.h" #include "../optimization/Eliminator.h" #include "../optimization/Reordering.h" +#include "../periphery/VPM.h" #include "../spirv/SPIRVBuiltins.h" #include "Inliner.h" #include "LiteralValues.h" @@ -32,6 +33,7 @@ using namespace vc4c; using namespace vc4c::normalization; +using namespace vc4c::periphery; static bool checkWorkGroupUniform(const Value& arg) { @@ -389,6 +391,7 @@ void ValueBinaryOp::expand(ExpandedExprs& exprs) } int sign = num >= 0; + // TODO: Care other types auto value = Value(Literal(std::abs(num)), TYPE_INT32); std::shared_ptr expr = std::make_shared(value); exprs.push_back(std::make_pair(sign, expr)); @@ -627,7 +630,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { if(call->methodName == VLOAD16_METHOD_NAME) { - auto addr = *call->getArgument(0); + auto addr = call->assertArgument(0); logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " << addr.to_string() << logging::endl; addrValues.push_back(addr); @@ -697,32 +700,53 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; - if (eqDiff) + if(eqDiff) { - auto it = bb.walk(); - bool firstCall = true; - while (!it.isEndOfBlock()) + logging::debug() << "diff: " << diff->to_string() << logging::endl; + + if (auto term = std::dynamic_pointer_cast(diff)) { - auto call = it.get(); - if (call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + if (auto mpValue = term->value.getConstantValue()) { - if (firstCall) - { - firstCall = false; - - // TODO: limit loadInstrs.size() - // it.reset(DMA load); - } - else + if (auto mpLiteral = mpValue->getLiteralValue()) { - // it.reset(VPM load); - it.erase(); + if (mpLiteral->unsignedInt() < 1u << 12) + { + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()); + + auto it = bb.walk(); + bool firstCall = true; + while(!it.isEndOfBlock()) + { + auto call = it.get(); + if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + { + it.erase(); + + if(firstCall) + { + firstCall = false; + + // TODO: limit loadInstrs.size() + + uint64_t rows = loadInstrs.size(); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); + auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); + it = method.vpm->insertReadRAM(method, it, call->assertArgument(1), TYPE_INT32, &area, + true, INT_ZERO, entries, Optional(memoryPitch)); + } + + auto output = *call->getOutput(); + it = method.vpm->insertReadVPM(method, it, output); + } + else + { + it.nextInBlock(); + } + } + } } } - else - { - it.nextInBlock(); - } } } } diff --git a/src/periphery/VPM.cpp b/src/periphery/VPM.cpp index 52507e19..76681d43 100644 --- a/src/periphery/VPM.cpp +++ b/src/periphery/VPM.cpp @@ -675,7 +675,7 @@ InstructionWalker VPM::insertWriteVPM(Method& method, InstructionWalker it, cons } InstructionWalker VPM::insertReadRAM(Method& method, InstructionWalker it, const Value& memoryAddress, DataType type, - const VPMArea* area, bool useMutex, const Value& inAreaOffset, const Value& numEntries) + const VPMArea* area, bool useMutex, const Value& inAreaOffset, const Value& numEntries, Optional memoryPitch) { if(area != nullptr) // FIXME this needs to have the numEntries added and the correct type!!! @@ -744,7 +744,8 @@ InstructionWalker VPM::insertReadRAM(Method& method, InstructionWalker it, const if(numEntries != INT_ONE) // NOTE: This for read the pitch (start-to-start) and for write the stride (end-to-start) is set, we need to set // this to the data size, but not required for write setup! - strideSetup.strideSetup = VPRStrideSetup(static_cast(type.getInMemoryWidth())); + // strideSetup.strideSetup = VPRStrideSetup(static_cast(type.getInMemoryWidth())); + strideSetup.strideSetup = VPRStrideSetup(static_cast(memoryPitch.value_or(type.getInMemoryWidth()))); it.emplace(new LoadImmediate(VPM_IN_SETUP_REGISTER, Literal(strideSetup.value))); it->addDecorations(InstructionDecorations::VPM_READ_CONFIGURATION); it.nextInBlock(); diff --git a/src/periphery/VPM.h b/src/periphery/VPM.h index be6af3ea..44039eea 100644 --- a/src/periphery/VPM.h +++ b/src/periphery/VPM.h @@ -866,7 +866,7 @@ namespace vc4c */ NODISCARD InstructionWalker insertReadRAM(Method& method, InstructionWalker it, const Value& memoryAddress, DataType type, const VPMArea* area = nullptr, bool useMutex = true, - const Value& inAreaOffset = INT_ZERO, const Value& numEntries = INT_ONE); + const Value& inAreaOffset = INT_ZERO, const Value& numEntries = INT_ONE, Optional memoryPitch = {}); /* * Inserts a write from VPM into RAM via DMA */ From 408477a3dc06c7fbfbbf797c6fe2ac054ef072e3 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 28 Jun 2020 21:49:13 +0900 Subject: [PATCH 08/30] Fix a little --- src/normalization/Normalizer.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 5ee8e9a9..c9ef6b0b 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -16,6 +16,7 @@ #include "../optimization/ControlFlow.h" #include "../optimization/Eliminator.h" #include "../optimization/Reordering.h" +#include "../intermediate/operators.h" #include "../periphery/VPM.h" #include "../spirv/SPIRVBuiltins.h" #include "Inliner.h" @@ -34,6 +35,7 @@ using namespace vc4c; using namespace vc4c::normalization; using namespace vc4c::periphery; +using namespace vc4c::operators; static bool checkWorkGroupUniform(const Value& arg) { @@ -729,10 +731,15 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // TODO: limit loadInstrs.size() + Value offset = assign(it, TYPE_INT32) = addrValues[0] << 4_val; + Value addr = assign(it, TYPE_INT32) = offset + call->assertArgument(1); + + DataType TYPE_UCHAR16{DataType::BYTE, 16, false}; + uint64_t rows = loadInstrs.size(); VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, call->assertArgument(1), TYPE_INT32, &area, + it = method.vpm->insertReadRAM(method, it, addr, TYPE_UCHAR16, &area, true, INT_ZERO, entries, Optional(memoryPitch)); } From 0a462903047b963ea89908c3e6fe646fa1677c76 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 5 Jul 2020 19:43:01 +0900 Subject: [PATCH 09/30] Finished implementation --- src/normalization/Normalizer.cpp | 90 ++++++++++---------------------- src/periphery/VPM.cpp | 8 +-- src/periphery/VPM.h | 8 +-- 3 files changed, 38 insertions(+), 68 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index c9ef6b0b..f1912efa 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -301,7 +301,7 @@ class ValueBinaryOp : public ValueExpr class ValueTerm : public ValueExpr { public: - ValueTerm(Value& value) : value(value) {} + ValueTerm(const Value& value) : value(value) {} bool operator==(const ValueExpr& other) const override { @@ -348,7 +348,7 @@ class ValueTerm : public ValueExpr return value.to_string(); } - Value value; + const Value value; }; bool ValueBinaryOp::operator==(const ValueExpr& other) const @@ -585,9 +585,9 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) ValueExpr::ExpandedExprs expanded; expr->expand(expanded); - for(auto& p : expanded) - logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; - logging::debug() << logging::endl; + // for(auto& p : expanded) + // logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + // logging::debug() << logging::endl; for(auto p = expanded.begin(); p != expanded.end();) { @@ -606,10 +606,9 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) } } - auto result = expanded[0].second; - for(size_t i = 1; i < expanded.size(); i++) + std::shared_ptr result = std::make_shared(INT_ZERO); + for(auto& p : expanded) { - auto p = expanded[i]; result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); } @@ -633,8 +632,6 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& if(call->methodName == VLOAD16_METHOD_NAME) { auto addr = call->assertArgument(0); - logging::debug() << "method call = " << call->to_string() << ", " << call->methodName << ", " - << addr.to_string() << logging::endl; addrValues.push_back(addr); loadInstrs.push_back(call); } @@ -654,10 +651,10 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); } - } - else - { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + } } } @@ -670,10 +667,10 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } } - for(auto& pair : addrExprs) + /*for(auto& pair : addrExprs) { logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - } + }*/ std::shared_ptr diff = nullptr; bool eqDiff = true; @@ -687,8 +684,6 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // Apply calcValueExpr again for integer literals. currentDiff = calcValueExpr(currentDiff); - logging::debug() << i << ": " << currentDiff->to_string() << logging::endl; - if(diff == nullptr) { diff = currentDiff; @@ -700,21 +695,20 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } } - logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + // logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; if(eqDiff) { - logging::debug() << "diff: " << diff->to_string() << logging::endl; - if (auto term = std::dynamic_pointer_cast(diff)) { if (auto mpValue = term->value.getConstantValue()) { if (auto mpLiteral = mpValue->getLiteralValue()) { - if (mpLiteral->unsignedInt() < 1u << 12) + if (mpLiteral->unsignedInt() < (1u << 12)) { - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()); + // TODO: cover types other than uchar + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * 1 * 16; auto it = bb.walk(); bool firstCall = true; @@ -725,6 +719,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { it.erase(); + auto output = *call->getOutput(); if(firstCall) { firstCall = false; @@ -739,12 +734,18 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& uint64_t rows = loadInstrs.size(); VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, TYPE_UCHAR16, &area, + it = method.vpm->insertReadRAM(method, it, addr, TYPE_UCHAR16,/* &area */ nullptr, true, INT_ZERO, entries, Optional(memoryPitch)); - } - auto output = *call->getOutput(); - it = method.vpm->insertReadVPM(method, it, output); + // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); + it = method.vpm->insertReadVPM(method, it, output, &area, true); + } + else { + // TODO: gather these instructions in one mutex lock + it = method.vpm->insertLockMutex(it, true); + assign(it, output) = VPM_IO_REGISTER; + it = method.vpm->insertUnlockMutex(it, true); + } } else { @@ -779,23 +780,7 @@ void Normalizer::normalize(Module& module) const } { - logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; - for(auto& method : module) - { - auto it = method->walkAllInstructions(); - while(!it.isEndOfMethod()) - { - auto ii = it.get(); - logging::info() << ii->to_string() << logging::endl; - it = it.nextInMethod(); - } - } - - std::string a; - std::cin >> a; - } - - { + // TODO: move this optimization to appropriate location auto kernels = module.getKernels(); for(Method* kernelFunc : kernels) { @@ -803,23 +788,6 @@ void Normalizer::normalize(Module& module) const } } - { - logging::info() << "=====================================" << __FILE__ << " : " << __LINE__ << logging::endl; - for(auto& method : module) - { - auto it = method->walkAllInstructions(); - while(!it.isEndOfMethod()) - { - auto ii = it.get(); - logging::info() << ii->to_string() << logging::endl; - it = it.nextInMethod(); - } - } - - std::string a; - std::cin >> a; - } - auto kernels = module.getKernels(); // 2. inline kernel-functions for(Method* kernelFunc : kernels) diff --git a/src/periphery/VPM.cpp b/src/periphery/VPM.cpp index 76681d43..f459dd7d 100644 --- a/src/periphery/VPM.cpp +++ b/src/periphery/VPM.cpp @@ -1170,15 +1170,17 @@ VPWDMASetup VPMArea::toWriteDMASetup(DataType elementType, uint8_t numRows) cons return setup; } -VPRGenericSetup VPMArea::toReadSetup(DataType elementType, uint8_t numRows) const +VPRGenericSetup VPMArea::toReadSetup(DataType elementType/*, uint8_t numRows*/) const { + uint8_t numRows_ = numRows; + elementType = simplifyComplexTypes(elementType); DataType type = elementType.isUnknown() ? getElementType() : elementType; if(type.getScalarBitCount() > 32) { // 64-bit integer vectors are stored as 2 rows of 32-bit integer vectors in VPM type = DataType{32, type.getVectorWidth(), type.isFloatingType()}; - numRows = 2 * numRows; + numRows_ = 2 * numRows_; } if(type.isUnknown()) throw CompilationError( @@ -1187,7 +1189,7 @@ VPRGenericSetup VPMArea::toReadSetup(DataType elementType, uint8_t numRows) cons // if we can pack into a single row, do so. Otherwise set stride to beginning of next row const uint8_t stride = canBePackedIntoRow() ? 1 : static_cast(TYPE_INT32.getScalarBitCount() / type.getScalarBitCount()); - VPRGenericSetup setup(getVPMSize(type), stride, numRows, calculateQPUSideAddress(type, rowOffset, 0)); + VPRGenericSetup setup(getVPMSize(type), stride, numRows_, calculateQPUSideAddress(type, rowOffset, 0)); setup.setHorizontal(IS_HORIZONTAL); setup.setLaned(!IS_PACKED); return setup; diff --git a/src/periphery/VPM.h b/src/periphery/VPM.h index 44039eea..6ffee180 100644 --- a/src/periphery/VPM.h +++ b/src/periphery/VPM.h @@ -801,7 +801,7 @@ namespace vc4c * * If the data-type is set to unknown, the default element-type of this area is used */ - VPRGenericSetup toReadSetup(DataType elementType, uint8_t numRows = 1) const; + VPRGenericSetup toReadSetup(DataType elementType/*, uint8_t numRows = 1*/) const; /* * Generates a RAM-to-VPM DMA read setup for loading the contents of a memory address into this VPM area @@ -909,12 +909,12 @@ namespace vc4c */ void dumpUsage() const; + InstructionWalker insertLockMutex(InstructionWalker it, bool useMutex) const; + InstructionWalker insertUnlockMutex(InstructionWalker it, bool useMutex) const; + private: const unsigned maximumVPMSize; std::vector> areas; - - InstructionWalker insertLockMutex(InstructionWalker it, bool useMutex) const; - InstructionWalker insertUnlockMutex(InstructionWalker it, bool useMutex) const; }; /* From 7a31e860f739bde1ff90ca693d096baae0c71c6e Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 12 Jul 2020 19:34:48 +0900 Subject: [PATCH 10/30] Fixed a little --- src/normalization/Normalizer.cpp | 23 +++++++++++++++-------- src/periphery/VPM.cpp | 3 +++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index f1912efa..6d252fac 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -30,8 +30,6 @@ #include #include -#include - using namespace vc4c; using namespace vc4c::normalization; using namespace vc4c::periphery; @@ -623,7 +621,8 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& for(auto& bb : method) { std::vector loadInstrs; - std::vector addrValues; + std::vector offsetValues; + Optional addrValue; for(auto& it : bb) { // Find all method calls @@ -631,19 +630,27 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { if(call->methodName == VLOAD16_METHOD_NAME) { - auto addr = call->assertArgument(0); - addrValues.push_back(addr); + offsetValues.push_back(call->assertArgument(0)); loadInstrs.push_back(call); + + if (!addrValue.has_value()) + { + addrValue = call->getArgument(1); + } + else if (addrValue == call->getArgument(1)) + { + continue; + } } } } - if(addrValues.size() == 0) + if(offsetValues.size() <= 1) continue; std::vector>> addrExprs; - for(auto& addrValue : addrValues) + for(auto& addrValue : offsetValues) { if(auto loc = addrValue.checkLocal()) { @@ -726,7 +733,7 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = addrValues[0] << 4_val; + Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; Value addr = assign(it, TYPE_INT32) = offset + call->assertArgument(1); DataType TYPE_UCHAR16{DataType::BYTE, 16, false}; diff --git a/src/periphery/VPM.cpp b/src/periphery/VPM.cpp index f459dd7d..88c742c9 100644 --- a/src/periphery/VPM.cpp +++ b/src/periphery/VPM.cpp @@ -1189,6 +1189,9 @@ VPRGenericSetup VPMArea::toReadSetup(DataType elementType/*, uint8_t numRows*/) // if we can pack into a single row, do so. Otherwise set stride to beginning of next row const uint8_t stride = canBePackedIntoRow() ? 1 : static_cast(TYPE_INT32.getScalarBitCount() / type.getScalarBitCount()); + + if (numRows_ >= 16) numRows_ = 1; + VPRGenericSetup setup(getVPMSize(type), stride, numRows_, calculateQPUSideAddress(type, rowOffset, 0)); setup.setHorizontal(IS_HORIZONTAL); setup.setLaned(!IS_PACKED); From 70d1d85b0447f38308427071a6ff3a336af10807 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 19 Jul 2020 22:06:39 +0900 Subject: [PATCH 11/30] Care types other than uchar --- src/normalization/Normalizer.cpp | 77 ++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 6d252fac..0343741f 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -25,6 +25,10 @@ #include "MemoryAccess.h" #include "Rewrite.h" +#ifdef __GNUC__ +#include +#endif + #include "log.h" #include @@ -390,11 +394,10 @@ void ValueBinaryOp::expand(ExpandedExprs& exprs) break; } - int sign = num >= 0; // TODO: Care other types auto value = Value(Literal(std::abs(num)), TYPE_INT32); std::shared_ptr expr = std::make_shared(value); - exprs.push_back(std::make_pair(sign, expr)); + exprs.push_back(std::make_pair(true, expr)); } else { @@ -615,9 +618,6 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) void combineDMALoads(const Module& module, Method& method, const Configuration& config) { - // vload16(unsigned int, unsigned char*) - const std::string VLOAD16_METHOD_NAME = "_Z7vload16jPU3AS1Kh"; - for(auto& bb : method) { std::vector loadInstrs; @@ -628,19 +628,43 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // Find all method calls if(auto call = dynamic_cast(it.get())) { - if(call->methodName == VLOAD16_METHOD_NAME) - { - offsetValues.push_back(call->assertArgument(0)); - loadInstrs.push_back(call); + auto name = call->methodName; + +#ifdef __GNUC__ + // Copied from src/spirv/SPIRVHelper.cpp + // TODO: Move these codes to the new helper file. + int status; + char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); + std::string result = name; + + if(status == 0) + { + // if demangling is successful, output the demangled function name + result = real_name; + // the demangled name contains the arguments, so we need ignore them + result = result.substr(0, result.find('(')); + } + free(real_name); + auto isVload16 = result == "vload16"; +#else + auto isVload16 = name.find("vload16") != std::string::npos; +#endif + + // TODO: Check whether all second argument values are equal. + if(isVload16) + { if (!addrValue.has_value()) { addrValue = call->getArgument(1); } - else if (addrValue == call->getArgument(1)) + else if (addrValue != call->getArgument(1)) { continue; } + + offsetValues.push_back(call->assertArgument(0)); + loadInstrs.push_back(call); } } } @@ -648,6 +672,11 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& if(offsetValues.size() <= 1) continue; + for(auto& inst : loadInstrs) + { + logging::debug() << inst->to_string() << logging::endl; + } + std::vector>> addrExprs; for(auto& addrValue : offsetValues) @@ -674,10 +703,10 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } } - /*for(auto& pair : addrExprs) + for(auto& pair : addrExprs) { logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - }*/ + } std::shared_ptr diff = nullptr; bool eqDiff = true; @@ -702,10 +731,17 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& } } - // logging::debug() << "all loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; if(eqDiff) { + // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. + ValueExpr::ExpandedExprs expanded; + diff->expand(expanded); + diff = expanded[0].second; + + logging::debug() << "diff = " << diff->to_string() << logging::endl; + if (auto term = std::dynamic_pointer_cast(diff)) { if (auto mpValue = term->value.getConstantValue()) @@ -714,8 +750,6 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { if (mpLiteral->unsignedInt() < (1u << 12)) { - // TODO: cover types other than uchar - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * 1 * 16; auto it = bb.walk(); bool firstCall = true; @@ -731,17 +765,22 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& { firstCall = false; - // TODO: limit loadInstrs.size() + auto addrArg = call->assertArgument(1); + // TODO: limit loadInstrs.size() Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - Value addr = assign(it, TYPE_INT32) = offset + call->assertArgument(1); + Value addr = assign(it, TYPE_INT32) = offset + addrArg; + + auto elemType = addrArg.type.getElementType(); + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * elemType.getInMemoryWidth() * 16; - DataType TYPE_UCHAR16{DataType::BYTE, 16, false}; + // TODO: cover types other than uchar + DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; uint64_t rows = loadInstrs.size(); VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, TYPE_UCHAR16,/* &area */ nullptr, + it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, true, INT_ZERO, entries, Optional(memoryPitch)); // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); From fd9fed0fdec6bb0e83045749ab9f9a53d8ca67c9 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 26 Jul 2020 22:54:03 +0900 Subject: [PATCH 12/30] Fix --- src/normalization/Normalizer.cpp | 106 +++++++++++++++++-------------- 1 file changed, 58 insertions(+), 48 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 0343741f..3125b3a3 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -738,65 +738,75 @@ void combineDMALoads(const Module& module, Method& method, const Configuration& // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. ValueExpr::ExpandedExprs expanded; diff->expand(expanded); - diff = expanded[0].second; + if (expanded.size() == 1) { + diff = expanded[0].second; - logging::debug() << "diff = " << diff->to_string() << logging::endl; + // logging::debug() << "diff = " << diff->to_string() << logging::endl; - if (auto term = std::dynamic_pointer_cast(diff)) - { - if (auto mpValue = term->value.getConstantValue()) + if (auto term = std::dynamic_pointer_cast(diff)) { - if (auto mpLiteral = mpValue->getLiteralValue()) + // logging::debug() << "term = " << term->to_string() << logging::endl; + + if (auto mpValue = term->value.getConstantValue()) { - if (mpLiteral->unsignedInt() < (1u << 12)) - { + // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; - auto it = bb.walk(); - bool firstCall = true; - while(!it.isEndOfBlock()) + if (auto mpLiteral = mpValue->getLiteralValue()) + { + if (mpLiteral->unsignedInt() < (1u << 12)) { - auto call = it.get(); - if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + auto it = bb.walk(); + bool firstCall = true; + while(!it.isEndOfBlock()) { - it.erase(); - - auto output = *call->getOutput(); - if(firstCall) + auto call = it.get(); + if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) { - firstCall = false; - - auto addrArg = call->assertArgument(1); - - // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - Value addr = assign(it, TYPE_INT32) = offset + addrArg; - - auto elemType = addrArg.type.getElementType(); - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * elemType.getInMemoryWidth() * 16; - - // TODO: cover types other than uchar - DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; - - uint64_t rows = loadInstrs.size(); - VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); - auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, - true, INT_ZERO, entries, Optional(memoryPitch)); - - // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); - it = method.vpm->insertReadVPM(method, it, output, &area, true); + it.erase(); + + auto output = *call->getOutput(); + if(firstCall) + { + firstCall = false; + + auto addrArg = call->assertArgument(1); + + auto elemType = addrArg.type.getElementType(); + auto vectorSize = elemType.getInMemoryWidth() * 16; + + // TODO: limit loadInstrs.size() + Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; + // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); + Value addr = assign(it, TYPE_INT32) = offset + addrArg; + + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; + + // TODO: cover types other than vector16 + DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; + + uint64_t rows = loadInstrs.size(); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); + auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); + it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, + true, INT_ZERO, entries, Optional(memoryPitch)); + + // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); + it = method.vpm->insertReadVPM(method, it, output, &area, true); + } + else { + // TODO: gather these instructions in one mutex lock + it = method.vpm->insertLockMutex(it, true); + assign(it, output) = VPM_IO_REGISTER; + it = method.vpm->insertUnlockMutex(it, true); + } } - else { - // TODO: gather these instructions in one mutex lock - it = method.vpm->insertLockMutex(it, true); - assign(it, output) = VPM_IO_REGISTER; - it = method.vpm->insertUnlockMutex(it, true); + else + { + it.nextInBlock(); } } - else - { - it.nextInBlock(); - } + + logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; } } } From c39687c9f101bfb1fa1cc4ce43187661fdaa697f Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 15 Aug 2020 16:32:29 +0900 Subject: [PATCH 13/30] Create new files of ValueExpr --- src/normalization/Normalizer.cpp | 251 +------------------------------ src/optimization/ValueExpr.cpp | 195 ++++++++++++++++++++++++ src/optimization/ValueExpr.h | 96 ++++++++++++ src/optimization/sources.list | 3 +- 4 files changed, 297 insertions(+), 248 deletions(-) create mode 100644 src/optimization/ValueExpr.cpp create mode 100644 src/optimization/ValueExpr.h diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index 3125b3a3..d532b56f 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -25,6 +25,8 @@ #include "MemoryAccess.h" #include "Rewrite.h" +#include "../optimization/ValueExpr.h" + #ifdef __GNUC__ #include #endif @@ -39,6 +41,8 @@ using namespace vc4c::normalization; using namespace vc4c::periphery; using namespace vc4c::operators; +using namespace vc4c::optimization; + static bool checkWorkGroupUniform(const Value& arg) { if(arg.checkRegister()) @@ -243,253 +247,6 @@ static void runNormalizationStep( } } -class ValueExpr -{ -public: - // signed : value - using ExpandedExprs = std::vector>>; - - virtual ~ValueExpr() = default; - - virtual bool operator==(const ValueExpr& other) const = 0; - inline bool operator!=(const ValueExpr& other) const - { - return !(*this == other); - } - - virtual std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) = 0; - - // expand value expr as liner combination - // e.g. (a + b) * c = a * c + b * c - virtual void expand(ExpandedExprs& exprs) = 0; - - virtual Optional getInteger() const = 0; - - virtual std::string to_string() const = 0; -}; - -class ValueBinaryOp : public ValueExpr -{ -public: - enum class BinaryOp - { - Add, - Sub, - Mul, - Div, - Other, - }; - - ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : - left(left), op(op), right(right) - { - } - - bool operator==(const ValueExpr& other) const override; - - std::shared_ptr replaceLocal(Value& value, std::shared_ptr expr) override; - - void expand(ExpandedExprs& exprs) override; - - Optional getInteger() const override; - - std::string to_string() const override; - - std::shared_ptr left; - BinaryOp op; - std::shared_ptr right; -}; - -class ValueTerm : public ValueExpr -{ -public: - ValueTerm(const Value& value) : value(value) {} - - bool operator==(const ValueExpr& other) const override - { - if(auto otherTerm = dynamic_cast(&other)) - return value == otherTerm->value; - return false; - } - - std::shared_ptr replaceLocal(Value& from, std::shared_ptr expr) override - { - if(auto fromLocal = from.checkLocal()) - { - if(auto valueLocal = value.checkLocal()) - { - if(*fromLocal == *valueLocal) - { - return expr; - } - } - } - return std::make_shared(value); - } - - void expand(ExpandedExprs& exprs) override - { - exprs.push_back(std::make_pair(true, std::make_shared(value))); - } - - Optional getInteger() const override - { - if(auto lit = value.checkLiteral()) - { - return Optional(lit->signedInt()); - } - else if(auto imm = value.checkImmediate()) - { - return imm->getIntegerValue(); - } - return Optional(); - } - - std::string to_string() const override - { - return value.to_string(); - } - - const Value value; -}; - -bool ValueBinaryOp::operator==(const ValueExpr& other) const -{ - if(auto otherOp = dynamic_cast(&other)) - { - return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; - } - return false; -} - -std::shared_ptr ValueBinaryOp::replaceLocal(Value& value, std::shared_ptr expr) -{ - return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); -} - -void ValueBinaryOp::expand(ExpandedExprs& exprs) -{ - auto leftNum = left->getInteger(); - auto rightNum = right->getInteger(); - if(leftNum && rightNum) - { - int l = leftNum.value_or(0); - int r = rightNum.value_or(0); - int num = 0; - switch(op) - { - case BinaryOp::Add: - num = l + r; - break; - case BinaryOp::Sub: - num = l - r; - break; - case BinaryOp::Mul: - num = l * r; - break; - case BinaryOp::Div: - num = l / r; - break; - case BinaryOp::Other: - break; - } - - // TODO: Care other types - auto value = Value(Literal(std::abs(num)), TYPE_INT32); - std::shared_ptr expr = std::make_shared(value); - exprs.push_back(std::make_pair(true, expr)); - } - else - { - switch(op) - { - case BinaryOp::Add: - { - left->expand(exprs); - right->expand(exprs); - break; - } - case BinaryOp::Sub: - { - left->expand(exprs); - - ExpandedExprs temp; - right->expand(temp); - for(auto& e : temp) - { - e.first = !e.first; - } - exprs.insert(exprs.end(), temp.begin(), temp.end()); - break; - } - case BinaryOp::Mul: - { - if(leftNum || rightNum) - { - int num = 0; - std::shared_ptr expr = nullptr; - if(leftNum) - { - num = leftNum.value_or(0); - expr = right; - } - else - { - num = rightNum.value_or(0); - expr = left; - } - for(int i = 0; i < num; i++) - { - exprs.push_back(std::make_pair(true, expr)); - } - } - else - { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - } - break; - } - case BinaryOp::Div: - { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - break; - } - case BinaryOp::Other: - break; - } - } -} - -Optional ValueBinaryOp::getInteger() const -{ - return Optional(); -} - -std::string ValueBinaryOp::to_string() const -{ - std::string opStr; - switch(op) - { - case BinaryOp::Add: - opStr = "+"; - break; - case BinaryOp::Sub: - opStr = "-"; - break; - case BinaryOp::Mul: - opStr = "*"; - break; - case BinaryOp::Div: - opStr = "/"; - break; - case BinaryOp::Other: - opStr = "other"; - break; - } - - return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; -} - std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { return std::make_shared( diff --git a/src/optimization/ValueExpr.cpp b/src/optimization/ValueExpr.cpp new file mode 100644 index 00000000..57d51979 --- /dev/null +++ b/src/optimization/ValueExpr.cpp @@ -0,0 +1,195 @@ +/* + * Author: doe300 + * + * See the file "LICENSE" for the full license governing this code. + */ + +#include "ValueExpr.h" + +#include "../Locals.h" + +using namespace vc4c; +using namespace vc4c::optimization; + +bool ValueBinaryOp::operator==(const ValueExpr& other) const +{ + if(auto otherOp = dynamic_cast(&other)) + { + return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; + } + return false; +} + +std::shared_ptr ValueBinaryOp::replaceLocal(const Value& value, std::shared_ptr expr) +{ + return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); +} + +void ValueBinaryOp::expand(ExpandedExprs& exprs) +{ + auto leftNum = left->getInteger(); + auto rightNum = right->getInteger(); + if(leftNum && rightNum) + { + int l = leftNum.value_or(0); + int r = rightNum.value_or(0); + int num = 0; + switch(op) + { + case BinaryOp::Add: + num = l + r; + break; + case BinaryOp::Sub: + num = l - r; + break; + case BinaryOp::Mul: + num = l * r; + break; + case BinaryOp::Div: + num = l / r; + break; + case BinaryOp::Other: + break; + } + + // TODO: Care other types + auto value = Value(Literal(std::abs(num)), TYPE_INT32); + std::shared_ptr expr = std::make_shared(value); + exprs.push_back(std::make_pair(true, expr)); + } + else + { + switch(op) + { + case BinaryOp::Add: + { + left->expand(exprs); + right->expand(exprs); + break; + } + case BinaryOp::Sub: + { + left->expand(exprs); + + ExpandedExprs temp; + right->expand(temp); + for(auto& e : temp) + { + e.first = !e.first; + } + exprs.insert(exprs.end(), temp.begin(), temp.end()); + break; + } + case BinaryOp::Mul: + { + if(leftNum || rightNum) + { + int num = 0; + std::shared_ptr expr = nullptr; + if(leftNum) + { + num = leftNum.value_or(0); + expr = right; + } + else + { + num = rightNum.value_or(0); + expr = left; + } + for(int i = 0; i < num; i++) + { + exprs.push_back(std::make_pair(true, expr)); + } + } + else + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + } + break; + } + case BinaryOp::Div: + { + exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); + break; + } + case BinaryOp::Other: + break; + } + } +} + +Optional ValueBinaryOp::getInteger() const +{ + return Optional(); +} + +std::string ValueBinaryOp::to_string() const +{ + std::string opStr; + switch(op) + { + case BinaryOp::Add: + opStr = "+"; + break; + case BinaryOp::Sub: + opStr = "-"; + break; + case BinaryOp::Mul: + opStr = "*"; + break; + case BinaryOp::Div: + opStr = "/"; + break; + case BinaryOp::Other: + opStr = "other"; + break; + } + + return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; +} + +bool ValueTerm::operator==(const ValueExpr& other) const +{ + if(auto otherTerm = dynamic_cast(&other)) + return value == otherTerm->value; + return false; +} + +std::shared_ptr ValueTerm::replaceLocal(const Value& from, std::shared_ptr expr) +{ + if(auto fromLocal = from.checkLocal()) + { + if(auto valueLocal = value.checkLocal()) + { + if(*fromLocal == *valueLocal) + { + return expr; + } + } + } + return std::make_shared(value); +} + +void ValueTerm::expand(ExpandedExprs& exprs) +{ + exprs.push_back(std::make_pair(true, std::make_shared(value))); +} + +Optional ValueTerm::getInteger() const +{ + if(auto lit = value.checkLiteral()) + { + return Optional(lit->signedInt()); + } + else if(auto imm = value.checkImmediate()) + { + return imm->getIntegerValue(); + } + return Optional(); +} + +std::string ValueTerm::to_string() const +{ + return value.to_string(); +} + diff --git a/src/optimization/ValueExpr.h b/src/optimization/ValueExpr.h new file mode 100644 index 00000000..8ece239e --- /dev/null +++ b/src/optimization/ValueExpr.h @@ -0,0 +1,96 @@ +/* + * Author: doe300 + * + * See the file "LICENSE" for the full license governing this code. + */ +#ifndef VC4C_OPTIMIZATION_VALUEEXPR +#define VC4C_OPTIMIZATION_VALUEEXPR + +#include "../Values.h" + +#include +#include + +namespace vc4c +{ + namespace optimization + { + class ValueExpr + { + public: + // (signed, value) + using ExpandedExprs = std::vector>>; + + virtual ~ValueExpr() = default; + + virtual bool operator==(const ValueExpr& other) const = 0; + inline bool operator!=(const ValueExpr& other) const + { + return !(*this == other); + } + + virtual std::shared_ptr replaceLocal(const Value& value, std::shared_ptr expr) = 0; + + // expand value expr as liner combination + // e.g. (a + b) * c = a * c + b * c + virtual void expand(ExpandedExprs& exprs) = 0; + + virtual Optional getInteger() const = 0; + + virtual std::string to_string() const = 0; + }; + + class ValueBinaryOp : public ValueExpr + { + public: + enum class BinaryOp + { + Add, + Sub, + Mul, + Div, + Other, + }; + + ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : + left(left), op(op), right(right) + { + } + + bool operator==(const ValueExpr& other) const override; + + std::shared_ptr replaceLocal(const Value& value, std::shared_ptr expr) override; + + void expand(ExpandedExprs& exprs) override; + + Optional getInteger() const override; + + std::string to_string() const override; + + std::shared_ptr left; + BinaryOp op; + std::shared_ptr right; + }; + + class ValueTerm : public ValueExpr + { + public: + ValueTerm(const Value& value) : value(value) {} + + bool operator==(const ValueExpr& other) const override; + + std::shared_ptr replaceLocal(const Value& from, std::shared_ptr expr) override; + + void expand(ExpandedExprs& exprs) override; + + Optional getInteger() const override; + + std::string to_string() const override; + + const Value value; + }; + + } /* namespace optimizations */ +} /* namespace vc4c */ + +#endif /* VC4C_OPTIMIZATION_VALUEEXPR */ diff --git a/src/optimization/sources.list b/src/optimization/sources.list index 3a0fe012..ad9588fa 100644 --- a/src/optimization/sources.list +++ b/src/optimization/sources.list @@ -8,4 +8,5 @@ target_sources(${VC4C_LIBRARY_NAME} ${CMAKE_CURRENT_LIST_DIR}/Optimizer.cpp ${CMAKE_CURRENT_LIST_DIR}/Reordering.cpp ${CMAKE_CURRENT_LIST_DIR}/InstructionScheduler.cpp -) \ No newline at end of file + ${CMAKE_CURRENT_LIST_DIR}/ValueExpr.cpp +) From 2d18ead5646ea9dd4f083f112e1240f730dea9ed Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 15 Aug 2020 17:29:22 +0900 Subject: [PATCH 14/30] Move combineDMALoads to Combiner --- src/normalization/Normalizer.cpp | 337 +------------------------------ src/optimization/Combiner.cpp | 332 ++++++++++++++++++++++++++++++ src/optimization/Combiner.h | 3 + src/optimization/ValueExpr.cpp | 2 +- src/optimization/ValueExpr.h | 2 +- 5 files changed, 339 insertions(+), 337 deletions(-) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index d532b56f..d5a79f19 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -17,7 +17,6 @@ #include "../optimization/Eliminator.h" #include "../optimization/Reordering.h" #include "../intermediate/operators.h" -#include "../periphery/VPM.h" #include "../spirv/SPIRVBuiltins.h" #include "Inliner.h" #include "LiteralValues.h" @@ -25,11 +24,7 @@ #include "MemoryAccess.h" #include "Rewrite.h" -#include "../optimization/ValueExpr.h" - -#ifdef __GNUC__ -#include -#endif +#include "../optimization/Combiner.h" #include "log.h" @@ -41,8 +36,6 @@ using namespace vc4c::normalization; using namespace vc4c::periphery; using namespace vc4c::operators; -using namespace vc4c::optimization; - static bool checkWorkGroupUniform(const Value& arg) { if(arg.checkRegister()) @@ -247,332 +240,6 @@ static void runNormalizationStep( } } -std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) -{ - return std::make_shared( - std::make_shared(left), binOp, std::make_shared(right)); -} - -// try to convert shl to mul and return it as ValueExpr -std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) -{ - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - int shiftValue = 0; - if(auto lit = right.checkLiteral()) - { - shiftValue = lit->signedInt(); - } - else if(auto imm = right.checkImmediate()) - { - shiftValue = imm->getIntegerValue().value_or(0); - } - - if(shiftValue > 0) - { - auto right = Value(Literal(1 << shiftValue), TYPE_INT32); - return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); - } - else - { - return std::make_shared(value); - } -} - -std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) -{ - using BO = ValueBinaryOp::BinaryOp; - BO binOp = BO::Other; - - // add, sub, shr, shl, asr - if(auto op = dynamic_cast(inst)) - { - if(op->op == OP_ADD) - { - binOp = BO::Add; - } - else if(op->op == OP_SUB) - { - binOp = BO::Sub; - } - else if(op->op == OP_SHL) - { - // convert shl to mul - return shlToMul(value, op); - // TODO: shr, asr - } - else - { - // If op is neither add nor sub, return value as-is. - return std::make_shared(value); - } - - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - return makeValueBinaryOpFromLocal(left, binOp, right); - } - // mul, div - else if(auto op = dynamic_cast(inst)) - { - if(op->opCode == "mul") - { - binOp = BO::Mul; - } - else if(op->opCode == "div") - { - binOp = BO::Div; - } - else - { - // If op is neither add nor sub, return value as-is. - return std::make_shared(value); - } - - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - return makeValueBinaryOpFromLocal(left, binOp, right); - } - - return std::make_shared(value); -} - -std::shared_ptr calcValueExpr(std::shared_ptr expr) -{ - using BO = ValueBinaryOp::BinaryOp; - - ValueExpr::ExpandedExprs expanded; - expr->expand(expanded); - - // for(auto& p : expanded) - // logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; - // logging::debug() << logging::endl; - - for(auto p = expanded.begin(); p != expanded.end();) - { - auto comp = std::find_if( - expanded.begin(), expanded.end(), [&p](const std::pair>& other) { - return p->first != other.first && *p->second == *other.second; - }); - if(comp != expanded.end()) - { - expanded.erase(comp); - p = expanded.erase(p); - } - else - { - p++; - } - } - - std::shared_ptr result = std::make_shared(INT_ZERO); - for(auto& p : expanded) - { - result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); - } - - return result; -} - -void combineDMALoads(const Module& module, Method& method, const Configuration& config) -{ - for(auto& bb : method) - { - std::vector loadInstrs; - std::vector offsetValues; - Optional addrValue; - for(auto& it : bb) - { - // Find all method calls - if(auto call = dynamic_cast(it.get())) - { - - auto name = call->methodName; - -#ifdef __GNUC__ - // Copied from src/spirv/SPIRVHelper.cpp - // TODO: Move these codes to the new helper file. - int status; - char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); - std::string result = name; - - if(status == 0) - { - // if demangling is successful, output the demangled function name - result = real_name; - // the demangled name contains the arguments, so we need ignore them - result = result.substr(0, result.find('(')); - } - free(real_name); - auto isVload16 = result == "vload16"; -#else - auto isVload16 = name.find("vload16") != std::string::npos; -#endif - - // TODO: Check whether all second argument values are equal. - if(isVload16) - { - if (!addrValue.has_value()) - { - addrValue = call->getArgument(1); - } - else if (addrValue != call->getArgument(1)) - { - continue; - } - - offsetValues.push_back(call->assertArgument(0)); - loadInstrs.push_back(call); - } - } - } - - if(offsetValues.size() <= 1) - continue; - - for(auto& inst : loadInstrs) - { - logging::debug() << inst->to_string() << logging::endl; - } - - std::vector>> addrExprs; - - for(auto& addrValue : offsetValues) - { - if(auto loc = addrValue.checkLocal()) - { - if(auto writer = loc->getSingleWriter()) - { - addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); - } - else - { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); - } - } - } - - for(auto& current : addrExprs) - { - for(auto& other : addrExprs) - { - auto replaced = current.second->replaceLocal(other.first, other.second); - current.second = replaced; - } - } - - for(auto& pair : addrExprs) - { - logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - } - - std::shared_ptr diff = nullptr; - bool eqDiff = true; - for(size_t i = 1; i < addrExprs.size(); i++) - { - auto x = addrExprs[i - 1].second; - auto y = addrExprs[i].second; - auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); - - auto currentDiff = calcValueExpr(diffExpr); - // Apply calcValueExpr again for integer literals. - currentDiff = calcValueExpr(currentDiff); - - if(diff == nullptr) - { - diff = currentDiff; - } - if(*currentDiff != *diff) - { - eqDiff = false; - break; - } - } - - logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; - - if(eqDiff) - { - // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. - ValueExpr::ExpandedExprs expanded; - diff->expand(expanded); - if (expanded.size() == 1) { - diff = expanded[0].second; - - // logging::debug() << "diff = " << diff->to_string() << logging::endl; - - if (auto term = std::dynamic_pointer_cast(diff)) - { - // logging::debug() << "term = " << term->to_string() << logging::endl; - - if (auto mpValue = term->value.getConstantValue()) - { - // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; - - if (auto mpLiteral = mpValue->getLiteralValue()) - { - if (mpLiteral->unsignedInt() < (1u << 12)) - { - auto it = bb.walk(); - bool firstCall = true; - while(!it.isEndOfBlock()) - { - auto call = it.get(); - if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) - { - it.erase(); - - auto output = *call->getOutput(); - if(firstCall) - { - firstCall = false; - - auto addrArg = call->assertArgument(1); - - auto elemType = addrArg.type.getElementType(); - auto vectorSize = elemType.getInMemoryWidth() * 16; - - // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); - Value addr = assign(it, TYPE_INT32) = offset + addrArg; - - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; - - // TODO: cover types other than vector16 - DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; - - uint64_t rows = loadInstrs.size(); - VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); - auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, - true, INT_ZERO, entries, Optional(memoryPitch)); - - // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); - it = method.vpm->insertReadVPM(method, it, output, &area, true); - } - else { - // TODO: gather these instructions in one mutex lock - it = method.vpm->insertLockMutex(it, true); - assign(it, output) = VPM_IO_REGISTER; - it = method.vpm->insertUnlockMutex(it, true); - } - } - else - { - it.nextInBlock(); - } - } - - logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; - } - } - } - } - } - } - } -} - void Normalizer::normalize(Module& module) const { // 1. eliminate phi on all methods @@ -597,7 +264,7 @@ void Normalizer::normalize(Module& module) const auto kernels = module.getKernels(); for(Method* kernelFunc : kernels) { - combineDMALoads(module, *kernelFunc, config); + optimizations::combineDMALoads(module, *kernelFunc, config); } } diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index d1f3e28b..0d2a37de 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -11,6 +11,7 @@ #include "../intermediate/Helper.h" #include "../intermediate/operators.h" #include "../periphery/VPM.h" +#include "../optimization/ValueExpr.h" #include "Eliminator.h" #include "log.h" @@ -18,6 +19,10 @@ #include #include +#ifdef __GNUC__ +#include +#endif + // TODO combine y = (x >> n) << n with and // same for y = (x << n) >> n (at least of n constant) // TODO for "exact" shifts, y == (y >> n) << n! @@ -27,6 +32,7 @@ using namespace vc4c; using namespace vc4c::optimizations; using namespace vc4c::intermediate; using namespace vc4c::operators; +using namespace vc4c::periphery; // Taken from https://stackoverflow.com/questions/2835469/how-to-perform-rotate-shift-in-c?noredirect=1&lq=1 constexpr static uint32_t rotate_left_halfword(uint32_t value, uint8_t shift) noexcept @@ -1121,6 +1127,332 @@ InstructionWalker optimizations::combineArithmeticOperations( return it; } +std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) +{ + return std::make_shared( + std::make_shared(left), binOp, std::make_shared(right)); +} + +// try to convert shl to mul and return it as ValueExpr +std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) +{ + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + int shiftValue = 0; + if(auto lit = right.checkLiteral()) + { + shiftValue = lit->signedInt(); + } + else if(auto imm = right.checkImmediate()) + { + shiftValue = imm->getIntegerValue().value_or(0); + } + + if(shiftValue > 0) + { + auto right = Value(Literal(1 << shiftValue), TYPE_INT32); + return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); + } + else + { + return std::make_shared(value); + } +} + +std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) +{ + using BO = ValueBinaryOp::BinaryOp; + BO binOp = BO::Other; + + // add, sub, shr, shl, asr + if(auto op = dynamic_cast(inst)) + { + if(op->op == OP_ADD) + { + binOp = BO::Add; + } + else if(op->op == OP_SUB) + { + binOp = BO::Sub; + } + else if(op->op == OP_SHL) + { + // convert shl to mul + return shlToMul(value, op); + // TODO: shr, asr + } + else + { + // If op is neither add nor sub, return value as-is. + return std::make_shared(value); + } + + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, binOp, right); + } + // mul, div + else if(auto op = dynamic_cast(inst)) + { + if(op->opCode == "mul") + { + binOp = BO::Mul; + } + else if(op->opCode == "div") + { + binOp = BO::Div; + } + else + { + // If op is neither add nor sub, return value as-is. + return std::make_shared(value); + } + + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, binOp, right); + } + + return std::make_shared(value); +} + +std::shared_ptr calcValueExpr(std::shared_ptr expr) +{ + using BO = ValueBinaryOp::BinaryOp; + + ValueExpr::ExpandedExprs expanded; + expr->expand(expanded); + + // for(auto& p : expanded) + // logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; + // logging::debug() << logging::endl; + + for(auto p = expanded.begin(); p != expanded.end();) + { + auto comp = std::find_if( + expanded.begin(), expanded.end(), [&p](const std::pair>& other) { + return p->first != other.first && *p->second == *other.second; + }); + if(comp != expanded.end()) + { + expanded.erase(comp); + p = expanded.erase(p); + } + else + { + p++; + } + } + + std::shared_ptr result = std::make_shared(INT_ZERO); + for(auto& p : expanded) + { + result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); + } + + return result; +} + +void optimizations::combineDMALoads(const Module& module, Method& method, const Configuration& config) +{ + for(auto& bb : method) + { + std::vector loadInstrs; + std::vector offsetValues; + Optional addrValue; + for(auto& it : bb) + { + // Find all method calls + if(auto call = dynamic_cast(it.get())) + { + + auto name = call->methodName; + +#ifdef __GNUC__ + // Copied from src/spirv/SPIRVHelper.cpp + // TODO: Move these codes to the new helper file. + int status; + char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); + std::string result = name; + + if(status == 0) + { + // if demangling is successful, output the demangled function name + result = real_name; + // the demangled name contains the arguments, so we need ignore them + result = result.substr(0, result.find('(')); + } + free(real_name); + auto isVload16 = result == "vload16"; +#else + auto isVload16 = name.find("vload16") != std::string::npos; +#endif + + // TODO: Check whether all second argument values are equal. + if(isVload16) + { + if (!addrValue.has_value()) + { + addrValue = call->getArgument(1); + } + else if (addrValue != call->getArgument(1)) + { + continue; + } + + offsetValues.push_back(call->assertArgument(0)); + loadInstrs.push_back(call); + } + } + } + + if(offsetValues.size() <= 1) + continue; + + for(auto& inst : loadInstrs) + { + logging::debug() << inst->to_string() << logging::endl; + } + + std::vector>> addrExprs; + + for(auto& addrValue : offsetValues) + { + if(auto loc = addrValue.checkLocal()) + { + if(auto writer = loc->getSingleWriter()) + { + addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + } + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + } + } + } + + for(auto& current : addrExprs) + { + for(auto& other : addrExprs) + { + auto replaced = current.second->replaceLocal(other.first, other.second); + current.second = replaced; + } + } + + for(auto& pair : addrExprs) + { + logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; + } + + std::shared_ptr diff = nullptr; + bool eqDiff = true; + for(size_t i = 1; i < addrExprs.size(); i++) + { + auto x = addrExprs[i - 1].second; + auto y = addrExprs[i].second; + auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); + + auto currentDiff = calcValueExpr(diffExpr); + // Apply calcValueExpr again for integer literals. + currentDiff = calcValueExpr(currentDiff); + + if(diff == nullptr) + { + diff = currentDiff; + } + if(*currentDiff != *diff) + { + eqDiff = false; + break; + } + } + + logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + + if(eqDiff) + { + // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. + ValueExpr::ExpandedExprs expanded; + diff->expand(expanded); + if (expanded.size() == 1) { + diff = expanded[0].second; + + // logging::debug() << "diff = " << diff->to_string() << logging::endl; + + if (auto term = std::dynamic_pointer_cast(diff)) + { + // logging::debug() << "term = " << term->to_string() << logging::endl; + + if (auto mpValue = term->value.getConstantValue()) + { + // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; + + if (auto mpLiteral = mpValue->getLiteralValue()) + { + if (mpLiteral->unsignedInt() < (1u << 12)) + { + auto it = bb.walk(); + bool firstCall = true; + while(!it.isEndOfBlock()) + { + auto call = it.get(); + if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + { + it.erase(); + + auto output = *call->getOutput(); + if(firstCall) + { + firstCall = false; + + auto addrArg = call->assertArgument(1); + + auto elemType = addrArg.type.getElementType(); + auto vectorSize = elemType.getInMemoryWidth() * 16; + + // TODO: limit loadInstrs.size() + Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; + // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); + Value addr = assign(it, TYPE_INT32) = offset + addrArg; + + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; + + // TODO: cover types other than vector16 + DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; + + uint64_t rows = loadInstrs.size(); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); + auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); + it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, + true, INT_ZERO, entries, Optional(memoryPitch)); + + // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); + it = method.vpm->insertReadVPM(method, it, output, &area, true); + } + else { + // TODO: gather these instructions in one mutex lock + it = method.vpm->insertLockMutex(it, true); + assign(it, output) = VPM_IO_REGISTER; + it = method.vpm->insertUnlockMutex(it, true); + } + } + else + { + it.nextInBlock(); + } + } + + logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; + } + } + } + } + } + } + } +} + static Optional> combineAdditions( Method& method, InstructionWalker referenceIt, FastMap& addedValues) { diff --git a/src/optimization/Combiner.h b/src/optimization/Combiner.h index bac4ca98..e0c38c5c 100644 --- a/src/optimization/Combiner.h +++ b/src/optimization/Combiner.h @@ -154,6 +154,9 @@ namespace vc4c InstructionWalker combineArithmeticOperations( const Module& module, Method& method, InstructionWalker it, const Configuration& config); + // TODO documentation + void combineDMALoads(const Module& module, Method& method, const Configuration& config); + // TODO documentation, TODO move somewhere else?! bool cacheWorkGroupDMAAccess(const Module& module, Method& method, const Configuration& config); } // namespace optimizations diff --git a/src/optimization/ValueExpr.cpp b/src/optimization/ValueExpr.cpp index 57d51979..5e4058b6 100644 --- a/src/optimization/ValueExpr.cpp +++ b/src/optimization/ValueExpr.cpp @@ -9,7 +9,7 @@ #include "../Locals.h" using namespace vc4c; -using namespace vc4c::optimization; +using namespace vc4c::optimizations; bool ValueBinaryOp::operator==(const ValueExpr& other) const { diff --git a/src/optimization/ValueExpr.h b/src/optimization/ValueExpr.h index 8ece239e..81499506 100644 --- a/src/optimization/ValueExpr.h +++ b/src/optimization/ValueExpr.h @@ -13,7 +13,7 @@ namespace vc4c { - namespace optimization + namespace optimizations { class ValueExpr { From c7358c30e8d149866e8a3861e2a9f6822a487265 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Tue, 18 Aug 2020 18:35:13 +0900 Subject: [PATCH 15/30] Add a test of CombineDMALoads --- src/periphery/VPM.h | 4 +- test/TestOptimizationSteps.cpp | 92 ++++++++++++++++++++++++++++++++++ test/TestOptimizationSteps.h | 1 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/periphery/VPM.h b/src/periphery/VPM.h index 6ffee180..04d2cb8b 100644 --- a/src/periphery/VPM.h +++ b/src/periphery/VPM.h @@ -408,7 +408,9 @@ namespace vc4c * * see Broadcom spec, table 36 */ - class VPRDMASetup : private Bitfield + // class VPRDMASetup : private Bitfield + // TODO: Changed to public, is it ok? + class VPRDMASetup : public Bitfield { public: VPRDMASetup( diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index 5dcd4682..a067b1a0 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -8,15 +8,19 @@ #include "Expression.h" #include "Method.h" #include "Module.h" +#include "Bitfield.h" #include "intermediate/Helper.h" #include "intermediate/operators.h" #include "optimization/Combiner.h" #include "optimization/ControlFlow.h" #include "optimization/Eliminator.h" #include "optimization/Flags.h" +#include "periphery/VPM.h" #include +#include "log.h" + using namespace vc4c; using namespace vc4c::optimizations; using namespace vc4c::operators; @@ -35,6 +39,7 @@ TestOptimizationSteps::TestOptimizationSteps() TEST_ADD(TestOptimizationSteps::testEliminateBitOperations); TEST_ADD(TestOptimizationSteps::testCombineRotations); TEST_ADD(TestOptimizationSteps::testLoopInvariantCodeMotion); + TEST_ADD(TestOptimizationSteps::testCombineDMALoads); } static bool checkEquals( @@ -1989,3 +1994,90 @@ void TestOptimizationSteps::testLoopInvariantCodeMotion() it.nextInMethod(); TEST_ASSERT(!!it.get()); } + +void TestOptimizationSteps::testCombineDMALoads() +{ + using namespace vc4c::intermediate; + Configuration config{}; + Module module{config}; + Method inputMethod(module); + + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); + + auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + + // TODO: Add a case that the first argument of vload16 is a variable. + + DataType TYPE16{DataType::WORD, 16, true}; + + const Local* createLocal(DataType type, const std::string& name) __attribute__((returns_nonnull)); + auto res1 = inputMethod.addNewLocal(TYPE16); + auto res2 = inputMethod.addNewLocal(TYPE16); + auto res3 = inputMethod.addNewLocal(TYPE16); + inIt.emplace((new intermediate::MethodCall(std::move(res1), "vload16", {0_val, in}))); + inIt.emplace((new intermediate::MethodCall(std::move(res2), "vload16", {1_val, in}))); + inIt.emplace((new intermediate::MethodCall(std::move(res3), "vload16", {2_val, in}))); + + const int numOfLoads = 3; + periphery::VPRDMASetup expectedDMASetup(0, 0, numOfLoads, 1, 0); + + combineDMALoads(module, inputMethod, config); + + for(auto& bb : inputMethod) + { + int numOfDMASetup = 0; + int numOfStrideSetup = 0; + int numOfVPMSetup = 0; + int numOfVPMRead = 0; + + for(auto& it : bb) + { + if(auto move = dynamic_cast(it.get())) + { + auto source = move->getSource(); + if(source.getLiteralValue() && + (move->getOutput()->hasRegister(REG_VPM_IN_SETUP) || + has_flag(move->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) + { + auto dmaSetup = periphery::VPRSetup::fromLiteral(source.getLiteralValue()->unsignedInt()).dmaSetup; + TEST_ASSERT_EQUALS(expectedDMASetup, dmaSetup); + + numOfDMASetup++; + } + else if (auto reg = source.checkRegister()) + { + // VPM Read + if (reg->file != RegisterFile::ACCUMULATOR && reg->num == 48) + { + numOfVPMRead++; + } + } + } + else if (auto load = dynamic_cast(it.get())) + { + if (load->type == LoadType::REPLICATE_INT32 && + (load->getOutput()->hasRegister(REG_VPM_IN_SETUP) || + has_flag(load->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) + { + auto vpr = periphery::VPRSetup::fromLiteral(load->getImmediate().unsignedInt()); + if (vpr.isStrideSetup()) + { + TEST_ASSERT_EQUALS(64, vpr.strideSetup.getPitch()); + numOfStrideSetup++; + } + if (vpr.isGenericSetup()) + { + auto vpmSetup = vpr.genericSetup; + TEST_ASSERT_EQUALS(numOfLoads, vpmSetup.getNumber()); + numOfVPMSetup++; + } + } + } + } + + TEST_ASSERT_EQUALS(1, numOfDMASetup); + TEST_ASSERT_EQUALS(1, numOfStrideSetup); + TEST_ASSERT_EQUALS(1, numOfVPMSetup); + TEST_ASSERT_EQUALS(numOfLoads, numOfVPMRead); + } +} diff --git a/test/TestOptimizationSteps.h b/test/TestOptimizationSteps.h index 531c8f2d..2118dc9b 100644 --- a/test/TestOptimizationSteps.h +++ b/test/TestOptimizationSteps.h @@ -32,6 +32,7 @@ class TestOptimizationSteps : public Test::Suite void testEliminateMoves(); void testEliminateDeadCode(); void testLoopInvariantCodeMotion(); + void testCombineDMALoads(); private: void testMethodsEquals(vc4c::Method& m1, vc4c::Method& m2); From a055fbec14ea68d679c94168c1772332ffcd60db Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 23 Aug 2020 16:52:11 +0900 Subject: [PATCH 16/30] Move makeValueBinaryOpFromLocal to ValueExpr.* --- src/optimization/Combiner.cpp | 6 ------ src/optimization/ValueExpr.cpp | 6 ++++++ src/optimization/ValueExpr.h | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 0d2a37de..46481fdd 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -1127,12 +1127,6 @@ InstructionWalker optimizations::combineArithmeticOperations( return it; } -std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) -{ - return std::make_shared( - std::make_shared(left), binOp, std::make_shared(right)); -} - // try to convert shl to mul and return it as ValueExpr std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) { diff --git a/src/optimization/ValueExpr.cpp b/src/optimization/ValueExpr.cpp index 5e4058b6..f661710f 100644 --- a/src/optimization/ValueExpr.cpp +++ b/src/optimization/ValueExpr.cpp @@ -148,6 +148,12 @@ std::string ValueBinaryOp::to_string() const return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; } +std::shared_ptr optimizations::makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) +{ + return std::make_shared( + std::make_shared(left), binOp, std::make_shared(right)); +} + bool ValueTerm::operator==(const ValueExpr& other) const { if(auto otherTerm = dynamic_cast(&other)) diff --git a/src/optimization/ValueExpr.h b/src/optimization/ValueExpr.h index 81499506..e1f2ce9d 100644 --- a/src/optimization/ValueExpr.h +++ b/src/optimization/ValueExpr.h @@ -72,6 +72,8 @@ namespace vc4c std::shared_ptr right; }; + std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right); + class ValueTerm : public ValueExpr { public: From 6ca4768c12eecebae9e15bcd7de763e741b241b5 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 24 Aug 2020 17:13:23 +0900 Subject: [PATCH 17/30] Fix a test to check multiple types of vloadn --- test/TestOptimizationSteps.cpp | 157 +++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 64 deletions(-) diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index a067b1a0..64aa5e13 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -5,10 +5,10 @@ */ #include "TestOptimizationSteps.h" +#include "Bitfield.h" #include "Expression.h" #include "Method.h" #include "Module.h" -#include "Bitfield.h" #include "intermediate/Helper.h" #include "intermediate/operators.h" #include "optimization/Combiner.h" @@ -1998,86 +1998,115 @@ void TestOptimizationSteps::testLoopInvariantCodeMotion() void TestOptimizationSteps::testCombineDMALoads() { using namespace vc4c::intermediate; - Configuration config{}; - Module module{config}; - Method inputMethod(module); - - auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); - - auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; - - // TODO: Add a case that the first argument of vload16 is a variable. - - DataType TYPE16{DataType::WORD, 16, true}; - const Local* createLocal(DataType type, const std::string& name) __attribute__((returns_nonnull)); - auto res1 = inputMethod.addNewLocal(TYPE16); - auto res2 = inputMethod.addNewLocal(TYPE16); - auto res3 = inputMethod.addNewLocal(TYPE16); - inIt.emplace((new intermediate::MethodCall(std::move(res1), "vload16", {0_val, in}))); - inIt.emplace((new intermediate::MethodCall(std::move(res2), "vload16", {1_val, in}))); - inIt.emplace((new intermediate::MethodCall(std::move(res3), "vload16", {2_val, in}))); + auto testCombineDMALoadsSub = [&](Module& module, Method& inputMethod, Configuration& config, DataType vectorType) { + // TODO: Add a case that the first argument of vload16 is a variable. - const int numOfLoads = 3; - periphery::VPRDMASetup expectedDMASetup(0, 0, numOfLoads, 1, 0); + const int numOfLoads = 3; + periphery::VPRDMASetup expectedDMASetup(0, vectorType.getVectorWidth() % 16, numOfLoads, 1, 0); - combineDMALoads(module, inputMethod, config); + combineDMALoads(module, inputMethod, config); - for(auto& bb : inputMethod) - { - int numOfDMASetup = 0; - int numOfStrideSetup = 0; - int numOfVPMSetup = 0; - int numOfVPMRead = 0; - - for(auto& it : bb) + for(auto& bb : inputMethod) { - if(auto move = dynamic_cast(it.get())) + int numOfDMASetup = 0; + int numOfStrideSetup = 0; + int numOfVPMSetup = 0; + int numOfVPMRead = 0; + + for(auto& it : bb) { - auto source = move->getSource(); - if(source.getLiteralValue() && - (move->getOutput()->hasRegister(REG_VPM_IN_SETUP) || - has_flag(move->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) + if(auto move = dynamic_cast(it.get())) { - auto dmaSetup = periphery::VPRSetup::fromLiteral(source.getLiteralValue()->unsignedInt()).dmaSetup; - TEST_ASSERT_EQUALS(expectedDMASetup, dmaSetup); + auto source = move->getSource(); + if(source.getLiteralValue() && + (move->getOutput()->hasRegister(REG_VPM_IN_SETUP) || + has_flag(move->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) + { + auto dmaSetup = + periphery::VPRSetup::fromLiteral(source.getLiteralValue()->unsignedInt()).dmaSetup; + TEST_ASSERT_EQUALS(expectedDMASetup, dmaSetup); - numOfDMASetup++; - } - else if (auto reg = source.checkRegister()) - { - // VPM Read - if (reg->file != RegisterFile::ACCUMULATOR && reg->num == 48) + numOfDMASetup++; + } + else if(auto reg = source.checkRegister()) { - numOfVPMRead++; + // VPM Read + if(reg->file != RegisterFile::ACCUMULATOR && reg->num == 48) + { + numOfVPMRead++; + } } } - } - else if (auto load = dynamic_cast(it.get())) - { - if (load->type == LoadType::REPLICATE_INT32 && - (load->getOutput()->hasRegister(REG_VPM_IN_SETUP) || - has_flag(load->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) + else if(auto load = dynamic_cast(it.get())) { - auto vpr = periphery::VPRSetup::fromLiteral(load->getImmediate().unsignedInt()); - if (vpr.isStrideSetup()) - { - TEST_ASSERT_EQUALS(64, vpr.strideSetup.getPitch()); - numOfStrideSetup++; - } - if (vpr.isGenericSetup()) + if(load->type == LoadType::REPLICATE_INT32 && + (load->getOutput()->hasRegister(REG_VPM_IN_SETUP) || + has_flag(load->decoration, InstructionDecorations::VPM_READ_CONFIGURATION))) { - auto vpmSetup = vpr.genericSetup; - TEST_ASSERT_EQUALS(numOfLoads, vpmSetup.getNumber()); - numOfVPMSetup++; + auto vpr = periphery::VPRSetup::fromLiteral(load->getImmediate().unsignedInt()); + if(vpr.isStrideSetup()) + { + TEST_ASSERT_EQUALS(64, vpr.strideSetup.getPitch()); + numOfStrideSetup++; + } + if(vpr.isGenericSetup()) + { + auto vpmSetup = vpr.genericSetup; + TEST_ASSERT_EQUALS(numOfLoads, vpmSetup.getNumber()); + numOfVPMSetup++; + } } } } + + TEST_ASSERT_EQUALS(1, numOfDMASetup); + TEST_ASSERT_EQUALS(1, numOfStrideSetup); + TEST_ASSERT_EQUALS(1, numOfVPMSetup); + TEST_ASSERT_EQUALS(numOfLoads, numOfVPMRead); } + }; + + auto putMethodCall = [](Method& inputMethod, InstructionWalker& inIt, const DataType& vectorType, std::string funcName, std::vector&& args) { + auto res = inputMethod.addNewLocal(vectorType); + inIt.emplace((new intermediate::MethodCall(std::move(res), std::move(funcName), std::move(args)))); + }; + + const DataType Float16{DataType::WORD, 16, true}; + const DataType Float8 {DataType::WORD, 8, true}; + + // float16 vload16(size_t, const float*) + const std::string vload16f = "_Z7vload16jPU3AS1Kf"; + // float8 vload8(size_t, const float*) + const std::string vload8f = "_Z6vload8jPU3AS1Kf"; + + Configuration config{}; + + { + Module module{config}; + Method inputMethod(module); + + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); + auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + + putMethodCall(inputMethod, inIt, Float16, vload16f, {0_val, in}); + putMethodCall(inputMethod, inIt, Float16, vload16f, {1_val, in}); + putMethodCall(inputMethod, inIt, Float16, vload16f, {2_val, in}); + + testCombineDMALoadsSub(module, inputMethod, config, Float16); + } + + { + Module module{config}; + Method inputMethod(module); + + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); + auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + + putMethodCall(inputMethod, inIt, Float8, vload8f, {0_val, in}); + putMethodCall(inputMethod, inIt, Float8, vload8f, {1_val, in}); + putMethodCall(inputMethod, inIt, Float8, vload8f, {2_val, in}); - TEST_ASSERT_EQUALS(1, numOfDMASetup); - TEST_ASSERT_EQUALS(1, numOfStrideSetup); - TEST_ASSERT_EQUALS(1, numOfVPMSetup); - TEST_ASSERT_EQUALS(numOfLoads, numOfVPMRead); + testCombineDMALoadsSub(module, inputMethod, config, Float8); } } From 4bb82e9e5da209ce086792b93b081725908783b4 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 24 Aug 2020 18:49:00 +0900 Subject: [PATCH 18/30] Support vloadn other than vload16 --- src/optimization/Combiner.cpp | 271 +++++++++++++++++---------------- src/spirv/SPIRVHelper.cpp | 70 ++++++--- src/spirv/SPIRVHelper.h | 8 +- test/TestOptimizationSteps.cpp | 33 +++- 4 files changed, 223 insertions(+), 159 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 46481fdd..3cf77f56 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -12,16 +12,14 @@ #include "../intermediate/operators.h" #include "../periphery/VPM.h" #include "../optimization/ValueExpr.h" +#include "../spirv/SPIRVHelper.h" #include "Eliminator.h" #include "log.h" #include #include #include - -#ifdef __GNUC__ -#include -#endif +#include // TODO combine y = (x >> n) << n with and // same for y = (x << n) >> n (at least of n constant) @@ -1249,42 +1247,33 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) void optimizations::combineDMALoads(const Module& module, Method& method, const Configuration& config) { + using namespace std; + + const std::regex vloadReg("vload(2|3|4|8|16)"); + for(auto& bb : method) { - std::vector loadInstrs; - std::vector offsetValues; - Optional addrValue; + // loadInstrs, offsetValues, addrValue + map, vector, Optional>> vloads; + for(auto& it : bb) { - // Find all method calls + // Find all vloadn calls if(auto call = dynamic_cast(it.get())) { + auto name = vc4c::spirv::demangleFunctionName(call->methodName); - auto name = call->methodName; + std::smatch m; + if (std::regex_search(name, m, vloadReg)) { + int n = std::stoi(m.str(1)); -#ifdef __GNUC__ - // Copied from src/spirv/SPIRVHelper.cpp - // TODO: Move these codes to the new helper file. - int status; - char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); - std::string result = name; + // TODO: Check whether all second argument values are equal. + + auto& vload = vloads[n]; + auto& loadInstrs = get<0>(vload); + auto& offsetValues = get<1>(vload); + auto& addrValue = get<2>(vload); - if(status == 0) - { - // if demangling is successful, output the demangled function name - result = real_name; - // the demangled name contains the arguments, so we need ignore them - result = result.substr(0, result.find('(')); - } - free(real_name); - auto isVload16 = result == "vload16"; -#else - auto isVload16 = name.find("vload16") != std::string::npos; -#endif - - // TODO: Check whether all second argument values are equal. - if(isVload16) - { if (!addrValue.has_value()) { addrValue = call->getArgument(1); @@ -1300,144 +1289,156 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const } } - if(offsetValues.size() <= 1) - continue; + for (auto &p : vloads) { + auto vectorLength = p.first; + auto& vload = p.second; + auto& loadInstrs = get<0>(vload); + auto& offsetValues = get<1>(vload); + auto& addrValue = get<2>(vload); - for(auto& inst : loadInstrs) - { - logging::debug() << inst->to_string() << logging::endl; - } + if(offsetValues.size() <= 1) + continue; - std::vector>> addrExprs; + for(auto& inst : loadInstrs) + { + logging::debug() << inst->to_string() << logging::endl; + } - for(auto& addrValue : offsetValues) - { - if(auto loc = addrValue.checkLocal()) + std::vector>> addrExprs; + + for(auto& addrValue : offsetValues) { - if(auto writer = loc->getSingleWriter()) + if(auto loc = addrValue.checkLocal()) { - addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + if(auto writer = loc->getSingleWriter()) + { + addrExprs.push_back(std::make_pair(addrValue, iiToExpr(addrValue, writer))); + } + else + { + addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + } } else { + // TODO: is it ok? addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); } } - } - for(auto& current : addrExprs) - { - for(auto& other : addrExprs) + for(auto& current : addrExprs) { - auto replaced = current.second->replaceLocal(other.first, other.second); - current.second = replaced; + for(auto& other : addrExprs) + { + auto replaced = current.second->replaceLocal(other.first, other.second); + current.second = replaced; + } } - } - - for(auto& pair : addrExprs) - { - logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; - } - std::shared_ptr diff = nullptr; - bool eqDiff = true; - for(size_t i = 1; i < addrExprs.size(); i++) - { - auto x = addrExprs[i - 1].second; - auto y = addrExprs[i].second; - auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); - - auto currentDiff = calcValueExpr(diffExpr); - // Apply calcValueExpr again for integer literals. - currentDiff = calcValueExpr(currentDiff); - - if(diff == nullptr) + for(auto& pair : addrExprs) { - diff = currentDiff; + logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; } - if(*currentDiff != *diff) + + std::shared_ptr diff = nullptr; + bool eqDiff = true; + for(size_t i = 1; i < addrExprs.size(); i++) { - eqDiff = false; - break; - } - } + auto x = addrExprs[i - 1].second; + auto y = addrExprs[i].second; + auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); - logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + auto currentDiff = calcValueExpr(diffExpr); + // Apply calcValueExpr again for integer literals. + currentDiff = calcValueExpr(currentDiff); - if(eqDiff) - { - // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. - ValueExpr::ExpandedExprs expanded; - diff->expand(expanded); - if (expanded.size() == 1) { - diff = expanded[0].second; + if(diff == nullptr) + { + diff = currentDiff; + } + if(*currentDiff != *diff) + { + eqDiff = false; + break; + } + } - // logging::debug() << "diff = " << diff->to_string() << logging::endl; + logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; - if (auto term = std::dynamic_pointer_cast(diff)) - { - // logging::debug() << "term = " << term->to_string() << logging::endl; + if(eqDiff) + { + // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. + ValueExpr::ExpandedExprs expanded; + diff->expand(expanded); + if (expanded.size() == 1) { + diff = expanded[0].second; + + // logging::debug() << "diff = " << diff->to_string() << logging::endl; - if (auto mpValue = term->value.getConstantValue()) + if (auto term = std::dynamic_pointer_cast(diff)) { - // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; + // logging::debug() << "term = " << term->to_string() << logging::endl; - if (auto mpLiteral = mpValue->getLiteralValue()) + if (auto mpValue = term->value.getConstantValue()) { - if (mpLiteral->unsignedInt() < (1u << 12)) + // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; + + if (auto mpLiteral = mpValue->getLiteralValue()) { - auto it = bb.walk(); - bool firstCall = true; - while(!it.isEndOfBlock()) + if (mpLiteral->unsignedInt() < (1u << 12)) { - auto call = it.get(); - if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) + auto it = bb.walk(); + bool firstCall = true; + while(!it.isEndOfBlock()) { - it.erase(); - - auto output = *call->getOutput(); - if(firstCall) + auto call = it.get(); + if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) { - firstCall = false; - - auto addrArg = call->assertArgument(1); - - auto elemType = addrArg.type.getElementType(); - auto vectorSize = elemType.getInMemoryWidth() * 16; - - // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); - Value addr = assign(it, TYPE_INT32) = offset + addrArg; - - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; - - // TODO: cover types other than vector16 - DataType TYPE16{elemType.getInMemoryWidth() * DataType::BYTE, 16, false}; - - uint64_t rows = loadInstrs.size(); - VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); - auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, TYPE16,/* &area */ nullptr, - true, INT_ZERO, entries, Optional(memoryPitch)); - - // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); - it = method.vpm->insertReadVPM(method, it, output, &area, true); + it.erase(); + + auto output = *call->getOutput(); + if(firstCall) + { + firstCall = false; + + auto addrArg = call->assertArgument(1); + + auto elemType = addrArg.type.getElementType(); + auto vectorSize = elemType.getInMemoryWidth() * vectorLength; + + // TODO: limit loadInstrs.size() + Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; + // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); + Value addr = assign(it, TYPE_INT32) = offset + addrArg; + + uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; + + DataType VectorType{elemType.getInMemoryWidth() * DataType::BYTE, vectorLength, false}; + + uint64_t rows = loadInstrs.size(); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); + auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); + it = method.vpm->insertReadRAM(method, it, addr, VectorType,/* &area */ nullptr, + true, INT_ZERO, entries, Optional(memoryPitch)); + + // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); + it = method.vpm->insertReadVPM(method, it, output, &area, true); + } + else { + // TODO: gather these instructions in one mutex lock + it = method.vpm->insertLockMutex(it, true); + assign(it, output) = VPM_IO_REGISTER; + it = method.vpm->insertUnlockMutex(it, true); + } } - else { - // TODO: gather these instructions in one mutex lock - it = method.vpm->insertLockMutex(it, true); - assign(it, output) = VPM_IO_REGISTER; - it = method.vpm->insertUnlockMutex(it, true); + else + { + it.nextInBlock(); } } - else - { - it.nextInBlock(); - } - } - logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; + logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; + } } } } diff --git a/src/spirv/SPIRVHelper.cpp b/src/spirv/SPIRVHelper.cpp index b8f18ad6..77419b14 100644 --- a/src/spirv/SPIRVHelper.cpp +++ b/src/spirv/SPIRVHelper.cpp @@ -711,29 +711,36 @@ std::vector spirv::readStreamOfWords(std::istream* in) return words; } -std::string spirv::demangleFunctionName(const std::string& name) +void spirv::linkSPIRVModules(const std::vector& inputModules, std::ostream& output) { - if(name.find("_Z") != 0) - return name; +#ifndef SPIRV_FRONTEND + throw CompilationError(CompilationStep::LINKER, "SPIRV-Tools linker is not available!"); +#else + std::vector> binaries; + binaries.reserve(inputModules.size()); + std::transform(inputModules.begin(), inputModules.end(), std::back_inserter(binaries), readStreamOfWords); -#ifdef __GNUC__ - int status; - char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); - std::string result = name; + spvtools::LinkerOptions options; + options.SetCreateLibrary(false); + options.SetVerifyIds(true); + // the VC4CL intrinsics are not provided by any input module + options.SetAllowPartialLinkage(true); - if(status == 0) + spvtools::Context spvContext(SPV_ENV_OPENCL_EMBEDDED_1_2); + + std::vector linkedModules; + spv_result_t result = spvtools::Link(spvContext, binaries, &linkedModules, options); + + if(result != SPV_SUCCESS) + throw CompilationError(CompilationStep::PARSER, getErrorMessage(result)); + + for(const uint32_t u : linkedModules) { - // if demangling is successful, output the demangled function name - result = real_name; - // the demangled name contains the arguments, so we need ignore them - result = result.substr(0, result.find('(')); - CPPLOG_LAZY( - logging::Level::DEBUG, log << "Demangled function name '" << name << "' to: " << result << logging::endl); + output.write(reinterpret_cast(&u), sizeof(uint32_t)); } - free(real_name); - return result; -#else - return name; + CPPLOG_LAZY(logging::Level::DEBUG, + log << "Linked " << inputModules.size() << " modules into a single module with " << linkedModules.size() + << " words of data." << logging::endl); #endif } @@ -761,3 +768,30 @@ void spirv::addFunctionAliases(Module& module) module.functionAliases.emplace("atomic_or", "atom_or"); module.functionAliases.emplace("atomic_xor", "atom_xor"); } + +std::string spirv::demangleFunctionName(const std::string& name) +{ + if(name.find("_Z") != 0) + return name; + +#ifdef __GNUC__ + int status; + char* real_name = abi::__cxa_demangle(name.data(), nullptr, nullptr, &status); + std::string result = name; + + if(status == 0) + { + // if demangling is successful, output the demangled function name + result = real_name; + // the demangled name contains the arguments, so we need ignore them + result = result.substr(0, result.find('(')); + CPPLOG_LAZY( + logging::Level::DEBUG, log << "Demangled function name '" << name << "' to: " << result << logging::endl); + } + free(real_name); + return result; +#else + return name; +#endif +} + diff --git a/src/spirv/SPIRVHelper.h b/src/spirv/SPIRVHelper.h index c7c3122d..5901f7f9 100644 --- a/src/spirv/SPIRVHelper.h +++ b/src/spirv/SPIRVHelper.h @@ -14,6 +14,10 @@ #include +#endif + +#include + namespace vc4c { class Module; @@ -42,11 +46,9 @@ namespace vc4c std::vector readStreamOfWords(std::istream* in); - std::string demangleFunctionName(const std::string& name); - void addFunctionAliases(Module& module); + std::string demangleFunctionName(const std::string& name); } // namespace spirv } // namespace vc4c -#endif /* SPIRVHELPER_H */ diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index 64aa5e13..72cb39ac 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -2005,8 +2005,12 @@ void TestOptimizationSteps::testCombineDMALoads() const int numOfLoads = 3; periphery::VPRDMASetup expectedDMASetup(0, vectorType.getVectorWidth() % 16, numOfLoads, 1, 0); + inputMethod.dumpInstructions(); + combineDMALoads(module, inputMethod, config); + inputMethod.dumpInstructions(); + for(auto& bb : inputMethod) { int numOfDMASetup = 0; @@ -2047,7 +2051,7 @@ void TestOptimizationSteps::testCombineDMALoads() auto vpr = periphery::VPRSetup::fromLiteral(load->getImmediate().unsignedInt()); if(vpr.isStrideSetup()) { - TEST_ASSERT_EQUALS(64, vpr.strideSetup.getPitch()); + TEST_ASSERT_EQUALS(vectorType.getInMemoryWidth(), vpr.strideSetup.getPitch()); numOfStrideSetup++; } if(vpr.isGenericSetup()) @@ -2074,15 +2078,20 @@ void TestOptimizationSteps::testCombineDMALoads() const DataType Float16{DataType::WORD, 16, true}; const DataType Float8 {DataType::WORD, 8, true}; + const DataType Uchar16{DataType::BYTE, 16, false}; - // float16 vload16(size_t, const float*) + // vload16(size_t, const float*) const std::string vload16f = "_Z7vload16jPU3AS1Kf"; - // float8 vload8(size_t, const float*) + // vload8(size_t, const float*) const std::string vload8f = "_Z6vload8jPU3AS1Kf"; + // vload16(size_t, const float*) + const std::string vload16uc = "_Z7vload16jPU3AS1Kh"; Configuration config{}; { + // vload16f * 3 + Module module{config}; Method inputMethod(module); @@ -2097,6 +2106,8 @@ void TestOptimizationSteps::testCombineDMALoads() } { + // vload8f * 3 + Module module{config}; Method inputMethod(module); @@ -2109,4 +2120,20 @@ void TestOptimizationSteps::testCombineDMALoads() testCombineDMALoadsSub(module, inputMethod, config, Float8); } + + { + // vload16uc * 3 + + Module module{config}; + Method inputMethod(module); + + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); + auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + + putMethodCall(inputMethod, inIt, Uchar16, vload16uc, {0_val, in}); + putMethodCall(inputMethod, inIt, Uchar16, vload16uc, {1_val, in}); + putMethodCall(inputMethod, inIt, Uchar16, vload16uc, {2_val, in}); + + testCombineDMALoadsSub(module, inputMethod, config, Uchar16); + } } From 86cf23b93b406a00c0abf0f914007ee8a72e420d Mon Sep 17 00:00:00 2001 From: long-long-float Date: Mon, 24 Aug 2020 19:17:20 +0900 Subject: [PATCH 19/30] Remove deep nests --- src/optimization/Combiner.cpp | 140 +++++++++++++++++----------------- 1 file changed, 72 insertions(+), 68 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 3cf77f56..549412e8 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -10,8 +10,8 @@ #include "../analysis/MemoryAnalysis.h" #include "../intermediate/Helper.h" #include "../intermediate/operators.h" -#include "../periphery/VPM.h" #include "../optimization/ValueExpr.h" +#include "../periphery/VPM.h" #include "../spirv/SPIRVHelper.h" #include "Eliminator.h" #include "log.h" @@ -1264,21 +1264,22 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const auto name = vc4c::spirv::demangleFunctionName(call->methodName); std::smatch m; - if (std::regex_search(name, m, vloadReg)) { + if(std::regex_search(name, m, vloadReg)) + { int n = std::stoi(m.str(1)); // TODO: Check whether all second argument values are equal. auto& vload = vloads[n]; - auto& loadInstrs = get<0>(vload); + auto& loadInstrs = get<0>(vload); auto& offsetValues = get<1>(vload); - auto& addrValue = get<2>(vload); + auto& addrValue = get<2>(vload); - if (!addrValue.has_value()) + if(!addrValue.has_value()) { addrValue = call->getArgument(1); } - else if (addrValue != call->getArgument(1)) + else if(addrValue != call->getArgument(1)) { continue; } @@ -1289,12 +1290,13 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const } } - for (auto &p : vloads) { + for(auto& p : vloads) + { auto vectorLength = p.first; auto& vload = p.second; - auto& loadInstrs = get<0>(vload); + auto& loadInstrs = get<0>(vload); auto& offsetValues = get<1>(vload); - auto& addrValue = get<2>(vload); + auto& addrValue = get<2>(vload); if(offsetValues.size() <= 1) continue; @@ -1363,83 +1365,85 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const } } - logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" << logging::endl; + logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" + << logging::endl; if(eqDiff) { // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. ValueExpr::ExpandedExprs expanded; diff->expand(expanded); - if (expanded.size() == 1) { + if(expanded.size() == 1) + { diff = expanded[0].second; // logging::debug() << "diff = " << diff->to_string() << logging::endl; - if (auto term = std::dynamic_pointer_cast(diff)) - { - // logging::debug() << "term = " << term->to_string() << logging::endl; + auto term = std::dynamic_pointer_cast(diff); + auto mpValue = (term != nullptr) ? term->value.getConstantValue() : Optional{}; + auto mpLiteral = mpValue.has_value() ? mpValue->getLiteralValue() : Optional{}; - if (auto mpValue = term->value.getConstantValue()) + if(mpLiteral) + { + if(mpLiteral->unsignedInt() < (1u << 12)) { - // logging::debug() << "mpValue = " << mpValue->to_string() << logging::endl; - - if (auto mpLiteral = mpValue->getLiteralValue()) + auto it = bb.walk(); + bool firstCall = true; + while(!it.isEndOfBlock()) { - if (mpLiteral->unsignedInt() < (1u << 12)) + auto call = it.get(); + if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) { - auto it = bb.walk(); - bool firstCall = true; - while(!it.isEndOfBlock()) + it.erase(); + + auto output = *call->getOutput(); + if(firstCall) { - auto call = it.get(); - if(call && std::find(loadInstrs.begin(), loadInstrs.end(), call) != loadInstrs.end()) - { - it.erase(); - - auto output = *call->getOutput(); - if(firstCall) - { - firstCall = false; - - auto addrArg = call->assertArgument(1); - - auto elemType = addrArg.type.getElementType(); - auto vectorSize = elemType.getInMemoryWidth() * vectorLength; - - // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * Literal(vectorSize); - Value addr = assign(it, TYPE_INT32) = offset + addrArg; - - uint16_t memoryPitch = static_cast(mpLiteral->unsignedInt()) * vectorSize; - - DataType VectorType{elemType.getInMemoryWidth() * DataType::BYTE, vectorLength, false}; - - uint64_t rows = loadInstrs.size(); - VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); - auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = method.vpm->insertReadRAM(method, it, addr, VectorType,/* &area */ nullptr, - true, INT_ZERO, entries, Optional(memoryPitch)); - - // const VPMArea* area = nullptr, bool useMutex = true, const Value& inAreaOffset = INT_ZERO); - it = method.vpm->insertReadVPM(method, it, output, &area, true); - } - else { - // TODO: gather these instructions in one mutex lock - it = method.vpm->insertLockMutex(it, true); - assign(it, output) = VPM_IO_REGISTER; - it = method.vpm->insertUnlockMutex(it, true); - } - } - else - { - it.nextInBlock(); - } - } + firstCall = false; + + auto addrArg = call->assertArgument(1); + + auto elemType = addrArg.type.getElementType(); + auto vectorSize = elemType.getInMemoryWidth() * vectorLength; - logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; + // TODO: limit loadInstrs.size() + Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; + // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * + // Literal(vectorSize); + Value addr = assign(it, TYPE_INT32) = offset + addrArg; + + uint16_t memoryPitch = + static_cast(mpLiteral->unsignedInt()) * vectorSize; + + DataType VectorType{ + elemType.getInMemoryWidth() * DataType::BYTE, vectorLength, false}; + + uint64_t rows = loadInstrs.size(); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); + auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); + it = + method.vpm->insertReadRAM(method, it, addr, VectorType, /* &area */ nullptr, + true, INT_ZERO, entries, Optional(memoryPitch)); + + // const VPMArea* area = nullptr, bool useMutex = true, const Value& + // inAreaOffset = INT_ZERO); + it = method.vpm->insertReadVPM(method, it, output, &area, true); + } + else + { + // TODO: gather these instructions in one mutex lock + it = method.vpm->insertLockMutex(it, true); + assign(it, output) = VPM_IO_REGISTER; + it = method.vpm->insertUnlockMutex(it, true); + } + } + else + { + it.nextInBlock(); } } + + logging::debug() << loadInstrs.size() << " loads are combined" << logging::endl; } } } From efebd92d3762c25c7e710d9bdd84a6479ff79210 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sat, 29 Aug 2020 19:12:31 +0900 Subject: [PATCH 20/30] Add test cases and fix --- src/optimization/ValueExpr.cpp | 46 ++++++++++++++++++++++------------ src/optimization/ValueExpr.h | 2 +- test/TestOptimizationSteps.cpp | 43 +++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/src/optimization/ValueExpr.cpp b/src/optimization/ValueExpr.cpp index f661710f..6fab1c08 100644 --- a/src/optimization/ValueExpr.cpp +++ b/src/optimization/ValueExpr.cpp @@ -27,8 +27,24 @@ std::shared_ptr ValueBinaryOp::replaceLocal(const Value& value, std:: void ValueBinaryOp::expand(ExpandedExprs& exprs) { - auto leftNum = left->getInteger(); - auto rightNum = right->getInteger(); + ExpandedExprs leftEE, rightEE; + left->expand(leftEE); + right->expand(rightEE); + + auto getInteger = [](const std::pair> &v) { + std::function(const int&)> addSign = [&](const int& num) { + return make_optional(v.first ? num : -num); + }; + return v.second->getInteger() & addSign; + }; + + auto leftNum = (leftEE.size() == 1) ? getInteger(leftEE[0]) : Optional(); + auto rightNum = (rightEE.size() == 1) ? getInteger(rightEE[0]) : Optional(); + + auto append = [](ExpandedExprs &ee1, ExpandedExprs &ee2) { + ee1.insert(ee1.end(), ee2.begin(), ee2.end()); + }; + if(leftNum && rightNum) { int l = leftNum.value_or(0); @@ -63,21 +79,19 @@ void ValueBinaryOp::expand(ExpandedExprs& exprs) { case BinaryOp::Add: { - left->expand(exprs); - right->expand(exprs); + append(exprs, leftEE); + append(exprs, rightEE); break; } case BinaryOp::Sub: { - left->expand(exprs); + append(exprs, leftEE); - ExpandedExprs temp; - right->expand(temp); - for(auto& e : temp) + for(auto& e : rightEE) { e.first = !e.first; } - exprs.insert(exprs.end(), temp.begin(), temp.end()); + append(exprs, rightEE); break; } case BinaryOp::Mul: @@ -85,20 +99,20 @@ void ValueBinaryOp::expand(ExpandedExprs& exprs) if(leftNum || rightNum) { int num = 0; - std::shared_ptr expr = nullptr; + ExpandedExprs *ee = nullptr; if(leftNum) { num = leftNum.value_or(0); - expr = right; + ee = &rightEE; } else { num = rightNum.value_or(0); - expr = left; + ee = &leftEE; } for(int i = 0; i < num; i++) { - exprs.push_back(std::make_pair(true, expr)); + append(exprs, *ee); } } else @@ -148,10 +162,11 @@ std::string ValueBinaryOp::to_string() const return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; } -std::shared_ptr optimizations::makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) +std::shared_ptr optimizations::makeValueBinaryOpFromLocal( + Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) { return std::make_shared( - std::make_shared(left), binOp, std::make_shared(right)); + std::make_shared(left), binOp, std::make_shared(right)); } bool ValueTerm::operator==(const ValueExpr& other) const @@ -198,4 +213,3 @@ std::string ValueTerm::to_string() const { return value.to_string(); } - diff --git a/src/optimization/ValueExpr.h b/src/optimization/ValueExpr.h index e1f2ce9d..5561b733 100644 --- a/src/optimization/ValueExpr.h +++ b/src/optimization/ValueExpr.h @@ -18,7 +18,7 @@ namespace vc4c class ValueExpr { public: - // (signed, value) + // signed, value using ExpandedExprs = std::vector>>; virtual ~ValueExpr() = default; diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index 72cb39ac..95366cb9 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -17,6 +17,8 @@ #include "optimization/Flags.h" #include "periphery/VPM.h" +#include "optimization/ValueExpr.h" + #include #include "log.h" @@ -2000,7 +2002,6 @@ void TestOptimizationSteps::testCombineDMALoads() using namespace vc4c::intermediate; auto testCombineDMALoadsSub = [&](Module& module, Method& inputMethod, Configuration& config, DataType vectorType) { - // TODO: Add a case that the first argument of vload16 is a variable. const int numOfLoads = 3; periphery::VPRDMASetup expectedDMASetup(0, vectorType.getVectorWidth() % 16, numOfLoads, 1, 0); @@ -2084,7 +2085,7 @@ void TestOptimizationSteps::testCombineDMALoads() const std::string vload16f = "_Z7vload16jPU3AS1Kf"; // vload8(size_t, const float*) const std::string vload8f = "_Z6vload8jPU3AS1Kf"; - // vload16(size_t, const float*) + // vload16(size_t, const uchar*) const std::string vload16uc = "_Z7vload16jPU3AS1Kh"; Configuration config{}; @@ -2136,4 +2137,42 @@ void TestOptimizationSteps::testCombineDMALoads() testCombineDMALoadsSub(module, inputMethod, config, Uchar16); } + + { + // vload16f * 3 + + Module module{config}; + Method inputMethod(module); + + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); + auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + auto offset1 = assign(inIt, TYPE_INT32, "%offset1") = 42_val; + auto offset2 = assign(inIt, TYPE_INT32, "%offset2") = offset1 + 1_val; + auto offset3 = assign(inIt, TYPE_INT32, "%offset3") = offset1 + 2_val; + + putMethodCall(inputMethod, inIt, Float16, vload16f, {offset3, in}); + putMethodCall(inputMethod, inIt, Float16, vload16f, {offset2, in}); + putMethodCall(inputMethod, inIt, Float16, vload16f, {offset1, in}); + + testCombineDMALoadsSub(module, inputMethod, config, Float16); + } + + { + // ValueExpr::expand + + Literal l(2); + Value a(l, TYPE_INT32); + Value b = 3_val; + std::shared_ptr expr(new ValueBinaryOp( + makeValueBinaryOpFromLocal(a, ValueBinaryOp::BinaryOp::Add, b), + ValueBinaryOp::BinaryOp::Sub, + std::make_shared(1_val))); + ValueExpr::ExpandedExprs expanded; + expr->expand(expanded); + + TEST_ASSERT_EQUALS(1, expanded.size()); + + auto n = expanded[0].second->getInteger(); + TEST_ASSERT_EQUALS(4, n.value_or(0)); + } } From 59cd141612df0711cbcb4450e1f4752092f55fe4 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 13 Sep 2020 16:07:09 +0900 Subject: [PATCH 21/30] Fix test codes --- test/TestOptimizationSteps.cpp | 38 +++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index 95366cb9..99e3a10e 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -2003,8 +2003,27 @@ void TestOptimizationSteps::testCombineDMALoads() auto testCombineDMALoadsSub = [&](Module& module, Method& inputMethod, Configuration& config, DataType vectorType) { + uint8_t elementBitCount = vectorType.getElementType().getScalarBitCount(); + uint8_t dmaSetupMode = 0; + uint8_t vpitch = 1; + switch(elementBitCount) + { + case 8: + dmaSetupMode = 4; + vpitch = 4; + break; + case 16: + dmaSetupMode = 2; + vpitch = 2; + break; + case 32: + dmaSetupMode = 0; + vpitch = 1; + break; + } + const int numOfLoads = 3; - periphery::VPRDMASetup expectedDMASetup(0, vectorType.getVectorWidth() % 16, numOfLoads, 1, 0); + periphery::VPRDMASetup expectedDMASetup(dmaSetupMode, vectorType.getVectorWidth() % 16, numOfLoads, vpitch, 0); inputMethod.dumpInstructions(); @@ -2096,8 +2115,10 @@ void TestOptimizationSteps::testCombineDMALoads() Module module{config}; Method inputMethod(module); + const DataType FloatPtr = inputMethod.createPointerType(TYPE_FLOAT); + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); - auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + auto in = assign(inIt, FloatPtr, "%in") = UNIFORM_REGISTER; putMethodCall(inputMethod, inIt, Float16, vload16f, {0_val, in}); putMethodCall(inputMethod, inIt, Float16, vload16f, {1_val, in}); @@ -2112,8 +2133,10 @@ void TestOptimizationSteps::testCombineDMALoads() Module module{config}; Method inputMethod(module); + const DataType FloatPtr = inputMethod.createPointerType(TYPE_FLOAT); + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); - auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + auto in = assign(inIt, FloatPtr, "%in") = UNIFORM_REGISTER; putMethodCall(inputMethod, inIt, Float8, vload8f, {0_val, in}); putMethodCall(inputMethod, inIt, Float8, vload8f, {1_val, in}); @@ -2128,8 +2151,10 @@ void TestOptimizationSteps::testCombineDMALoads() Module module{config}; Method inputMethod(module); + const DataType Int8Ptr = inputMethod.createPointerType(TYPE_INT8); + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); - auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + auto in = assign(inIt, Int8Ptr, "%in") = UNIFORM_REGISTER; putMethodCall(inputMethod, inIt, Uchar16, vload16uc, {0_val, in}); putMethodCall(inputMethod, inIt, Uchar16, vload16uc, {1_val, in}); @@ -2144,8 +2169,11 @@ void TestOptimizationSteps::testCombineDMALoads() Module module{config}; Method inputMethod(module); + const DataType FloatPtr = inputMethod.createPointerType(TYPE_FLOAT); + auto inIt = inputMethod.createAndInsertNewBlock(inputMethod.end(), "%dummy").walkEnd(); - auto in = assign(inIt, TYPE_INT32, "%in") = UNIFORM_REGISTER; + auto in = assign(inIt, FloatPtr, "%in") = UNIFORM_REGISTER; + auto offset1 = assign(inIt, TYPE_INT32, "%offset1") = 42_val; auto offset2 = assign(inIt, TYPE_INT32, "%offset2") = offset1 + 1_val; auto offset3 = assign(inIt, TYPE_INT32, "%offset3") = offset1 + 2_val; From 28de130c464ccaec40d00315f8fa732cde29c5e9 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Wed, 23 Sep 2020 15:43:17 +0900 Subject: [PATCH 22/30] Fix offset of vloadn --- src/optimization/Combiner.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 549412e8..e69f3126 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -1407,9 +1407,8 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const auto vectorSize = elemType.getInMemoryWidth() * vectorLength; // TODO: limit loadInstrs.size() - Value offset = assign(it, TYPE_INT32) = offsetValues[0] << 4_val; - // Value offset = assign(it, TYPE_INT32) = offsetValues[0] * - // Literal(vectorSize); + Value offset = assign(it, TYPE_INT32) = + offsetValues[0] * Literal(vectorLength * elemType.getInMemoryWidth()); Value addr = assign(it, TYPE_INT32) = offset + addrArg; uint16_t memoryPitch = @@ -1425,8 +1424,6 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const method.vpm->insertReadRAM(method, it, addr, VectorType, /* &area */ nullptr, true, INT_ZERO, entries, Optional(memoryPitch)); - // const VPMArea* area = nullptr, bool useMutex = true, const Value& - // inAreaOffset = INT_ZERO); it = method.vpm->insertReadVPM(method, it, output, &area, true); } else From 3b3bfb742d4ef33528f4f5be391e8907226caa09 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Wed, 23 Sep 2020 16:17:18 +0900 Subject: [PATCH 23/30] Fix a test to check the generic setup --- src/periphery/VPM.h | 3 +- test/TestOptimizationSteps.cpp | 54 +++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/periphery/VPM.h b/src/periphery/VPM.h index 04d2cb8b..4057c782 100644 --- a/src/periphery/VPM.h +++ b/src/periphery/VPM.h @@ -328,7 +328,8 @@ namespace vc4c * * see Broadcom spec, table 33 */ - class VPRGenericSetup : private Bitfield + // class VPRGenericSetup : private Bitfield + class VPRGenericSetup : public Bitfield { public: VPRGenericSetup(uint8_t size, uint8_t stride, uint8_t numVectors = 1, uint8_t address = 0) : Bitfield(0) diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index 99e3a10e..a9417ddf 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -2006,24 +2006,33 @@ void TestOptimizationSteps::testCombineDMALoads() uint8_t elementBitCount = vectorType.getElementType().getScalarBitCount(); uint8_t dmaSetupMode = 0; uint8_t vpitch = 1; + uint8_t vprSize = 0; + uint8_t vprStride = 0; switch(elementBitCount) { - case 8: - dmaSetupMode = 4; - vpitch = 4; - break; - case 16: - dmaSetupMode = 2; - vpitch = 2; - break; - case 32: - dmaSetupMode = 0; - vpitch = 1; - break; + case 8: + dmaSetupMode = 4; + vpitch = 4; + vprSize = 0; + vprStride = 4; + break; + case 16: + dmaSetupMode = 2; + vpitch = 2; + vprSize = 1; + vprStride = 2; + break; + case 32: + dmaSetupMode = 0; + vpitch = 1; + vprSize = 2; + vprStride = 1; + break; } const int numOfLoads = 3; periphery::VPRDMASetup expectedDMASetup(dmaSetupMode, vectorType.getVectorWidth() % 16, numOfLoads, vpitch, 0); + periphery::VPRGenericSetup expectedVPRSetup(vprSize, vprStride, numOfLoads, 0); inputMethod.dumpInstructions(); @@ -2035,7 +2044,7 @@ void TestOptimizationSteps::testCombineDMALoads() { int numOfDMASetup = 0; int numOfStrideSetup = 0; - int numOfVPMSetup = 0; + int numOfVPRSetup = 0; int numOfVPMRead = 0; for(auto& it : bb) @@ -2076,9 +2085,8 @@ void TestOptimizationSteps::testCombineDMALoads() } if(vpr.isGenericSetup()) { - auto vpmSetup = vpr.genericSetup; - TEST_ASSERT_EQUALS(numOfLoads, vpmSetup.getNumber()); - numOfVPMSetup++; + TEST_ASSERT_EQUALS(expectedVPRSetup, vpr.genericSetup); + numOfVPRSetup++; } } } @@ -2086,18 +2094,19 @@ void TestOptimizationSteps::testCombineDMALoads() TEST_ASSERT_EQUALS(1, numOfDMASetup); TEST_ASSERT_EQUALS(1, numOfStrideSetup); - TEST_ASSERT_EQUALS(1, numOfVPMSetup); + TEST_ASSERT_EQUALS(1, numOfVPRSetup); TEST_ASSERT_EQUALS(numOfLoads, numOfVPMRead); } }; - auto putMethodCall = [](Method& inputMethod, InstructionWalker& inIt, const DataType& vectorType, std::string funcName, std::vector&& args) { + auto putMethodCall = [](Method& inputMethod, InstructionWalker& inIt, const DataType& vectorType, + std::string funcName, std::vector&& args) { auto res = inputMethod.addNewLocal(vectorType); inIt.emplace((new intermediate::MethodCall(std::move(res), std::move(funcName), std::move(args)))); }; const DataType Float16{DataType::WORD, 16, true}; - const DataType Float8 {DataType::WORD, 8, true}; + const DataType Float8{DataType::WORD, 8, true}; const DataType Uchar16{DataType::BYTE, 16, false}; // vload16(size_t, const float*) @@ -2191,10 +2200,9 @@ void TestOptimizationSteps::testCombineDMALoads() Literal l(2); Value a(l, TYPE_INT32); Value b = 3_val; - std::shared_ptr expr(new ValueBinaryOp( - makeValueBinaryOpFromLocal(a, ValueBinaryOp::BinaryOp::Add, b), - ValueBinaryOp::BinaryOp::Sub, - std::make_shared(1_val))); + std::shared_ptr expr( + new ValueBinaryOp(makeValueBinaryOpFromLocal(a, ValueBinaryOp::BinaryOp::Add, b), + ValueBinaryOp::BinaryOp::Sub, std::make_shared(1_val))); ValueExpr::ExpandedExprs expanded; expr->expand(expanded); From e96cd41aa3b657b1a8d21d50e4fe53df0b76df02 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Wed, 30 Sep 2020 16:36:47 +0900 Subject: [PATCH 24/30] Remove unnecessary method --- src/spirv/SPIRVHelper.cpp | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/spirv/SPIRVHelper.cpp b/src/spirv/SPIRVHelper.cpp index 77419b14..c7567943 100644 --- a/src/spirv/SPIRVHelper.cpp +++ b/src/spirv/SPIRVHelper.cpp @@ -711,39 +711,6 @@ std::vector spirv::readStreamOfWords(std::istream* in) return words; } -void spirv::linkSPIRVModules(const std::vector& inputModules, std::ostream& output) -{ -#ifndef SPIRV_FRONTEND - throw CompilationError(CompilationStep::LINKER, "SPIRV-Tools linker is not available!"); -#else - std::vector> binaries; - binaries.reserve(inputModules.size()); - std::transform(inputModules.begin(), inputModules.end(), std::back_inserter(binaries), readStreamOfWords); - - spvtools::LinkerOptions options; - options.SetCreateLibrary(false); - options.SetVerifyIds(true); - // the VC4CL intrinsics are not provided by any input module - options.SetAllowPartialLinkage(true); - - spvtools::Context spvContext(SPV_ENV_OPENCL_EMBEDDED_1_2); - - std::vector linkedModules; - spv_result_t result = spvtools::Link(spvContext, binaries, &linkedModules, options); - - if(result != SPV_SUCCESS) - throw CompilationError(CompilationStep::PARSER, getErrorMessage(result)); - - for(const uint32_t u : linkedModules) - { - output.write(reinterpret_cast(&u), sizeof(uint32_t)); - } - CPPLOG_LAZY(logging::Level::DEBUG, - log << "Linked " << inputModules.size() << " modules into a single module with " << linkedModules.size() - << " words of data." << logging::endl); -#endif -} - void spirv::addFunctionAliases(Module& module) { /* From 99961e70801542298cb87ee57eaf559701e4cf23 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Wed, 30 Sep 2020 17:04:10 +0900 Subject: [PATCH 25/30] Revert SPIRVHelper.h and .cpp --- src/spirv/SPIRVHelper.cpp | 49 +++++++++++++++++++-------------------- src/spirv/SPIRVHelper.h | 8 +++---- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/spirv/SPIRVHelper.cpp b/src/spirv/SPIRVHelper.cpp index c7567943..b8f18ad6 100644 --- a/src/spirv/SPIRVHelper.cpp +++ b/src/spirv/SPIRVHelper.cpp @@ -711,31 +711,6 @@ std::vector spirv::readStreamOfWords(std::istream* in) return words; } -void spirv::addFunctionAliases(Module& module) -{ - /* - * Add function aliases - * - * This is required, since SPIR-V maps e.g. OpenCL 1.2 standard function atomic_inc(x) as well as OpenCL 1.0 - * extension function atom_inc(x) to an OpAtomicIIncrement instruction. Since we can only map this instruction - * to one call-site, we need to alias the other one. - * - * So we map e.g. OpAtomicIIncrement to a call to "atomic_inc". But in case this does not exist (i.e. originally, - * "atom_inc" was called), we add an alias back to the old OpenCL 1.0 extension function. - */ - module.functionAliases.emplace("atomic_add", "atom_add"); - module.functionAliases.emplace("atomic_sub", "atom_sub"); - module.functionAliases.emplace("atomic_xchg", "atom_xchg"); - module.functionAliases.emplace("atomic_inc", "atom_inc"); - module.functionAliases.emplace("atomic_dec", "atom_dec"); - module.functionAliases.emplace("atomic_cmpxchg", "atom_cmpxchg"); - module.functionAliases.emplace("atomic_min", "atom_min"); - module.functionAliases.emplace("atomic_max", "atom_max"); - module.functionAliases.emplace("atomic_and", "atom_and"); - module.functionAliases.emplace("atomic_or", "atom_or"); - module.functionAliases.emplace("atomic_xor", "atom_xor"); -} - std::string spirv::demangleFunctionName(const std::string& name) { if(name.find("_Z") != 0) @@ -762,3 +737,27 @@ std::string spirv::demangleFunctionName(const std::string& name) #endif } +void spirv::addFunctionAliases(Module& module) +{ + /* + * Add function aliases + * + * This is required, since SPIR-V maps e.g. OpenCL 1.2 standard function atomic_inc(x) as well as OpenCL 1.0 + * extension function atom_inc(x) to an OpAtomicIIncrement instruction. Since we can only map this instruction + * to one call-site, we need to alias the other one. + * + * So we map e.g. OpAtomicIIncrement to a call to "atomic_inc". But in case this does not exist (i.e. originally, + * "atom_inc" was called), we add an alias back to the old OpenCL 1.0 extension function. + */ + module.functionAliases.emplace("atomic_add", "atom_add"); + module.functionAliases.emplace("atomic_sub", "atom_sub"); + module.functionAliases.emplace("atomic_xchg", "atom_xchg"); + module.functionAliases.emplace("atomic_inc", "atom_inc"); + module.functionAliases.emplace("atomic_dec", "atom_dec"); + module.functionAliases.emplace("atomic_cmpxchg", "atom_cmpxchg"); + module.functionAliases.emplace("atomic_min", "atom_min"); + module.functionAliases.emplace("atomic_max", "atom_max"); + module.functionAliases.emplace("atomic_and", "atom_and"); + module.functionAliases.emplace("atomic_or", "atom_or"); + module.functionAliases.emplace("atomic_xor", "atom_xor"); +} diff --git a/src/spirv/SPIRVHelper.h b/src/spirv/SPIRVHelper.h index 5901f7f9..c7c3122d 100644 --- a/src/spirv/SPIRVHelper.h +++ b/src/spirv/SPIRVHelper.h @@ -14,10 +14,6 @@ #include -#endif - -#include - namespace vc4c { class Module; @@ -46,9 +42,11 @@ namespace vc4c std::vector readStreamOfWords(std::istream* in); - void addFunctionAliases(Module& module); std::string demangleFunctionName(const std::string& name); + void addFunctionAliases(Module& module); + } // namespace spirv } // namespace vc4c +#endif /* SPIRVHELPER_H */ From 90c50ca94b2b619cbede9b0a58c03b0919def8a5 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Wed, 30 Sep 2020 22:22:04 +0900 Subject: [PATCH 26/30] Fix to use Expression instead of ValueExpr and remove it --- src/Expression.cpp | 2 + src/Expression.h | 3 + src/optimization/Combiner.cpp | 230 ++++++++++++++++++++++++++------- src/optimization/ValueExpr.cpp | 215 ------------------------------ src/optimization/ValueExpr.h | 98 -------------- src/optimization/sources.list | 1 - test/TestOptimizationSteps.cpp | 36 +++--- 7 files changed, 205 insertions(+), 380 deletions(-) delete mode 100644 src/optimization/ValueExpr.cpp delete mode 100644 src/optimization/ValueExpr.h diff --git a/src/Expression.cpp b/src/Expression.cpp index 9bdba743..4d46c947 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -6,6 +6,8 @@ using namespace vc4c; constexpr OpCode Expression::FAKEOP_UMUL; +constexpr OpCode Expression::FAKEOP_MUL; +constexpr OpCode Expression::FAKEOP_DIV; SubExpression::SubExpression(const Optional& val) : Base(VariantNamespace::monostate{}) { diff --git a/src/Expression.h b/src/Expression.h index 4459a7df..1f33bebe 100644 --- a/src/Expression.h +++ b/src/Expression.h @@ -109,6 +109,9 @@ namespace vc4c // A fake operation to indicate an unsigned multiplication static constexpr OpCode FAKEOP_UMUL{"umul", 132, 132, 2, false, false, FlagBehavior::NONE}; + static constexpr OpCode FAKEOP_MUL{"mul", 132, 132, 2, false, false, FlagBehavior::NONE}; + static constexpr OpCode FAKEOP_DIV{"div", 132, 132, 2, false, false, FlagBehavior::NONE}; + OpCode code; SubExpression arg0; SubExpression arg1{}; diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index e69f3126..77fed6f2 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -10,7 +10,7 @@ #include "../analysis/MemoryAnalysis.h" #include "../intermediate/Helper.h" #include "../intermediate/operators.h" -#include "../optimization/ValueExpr.h" +#include "../Expression.h" #include "../periphery/VPM.h" #include "../spirv/SPIRVHelper.h" #include "Eliminator.h" @@ -1125,8 +1125,13 @@ InstructionWalker optimizations::combineArithmeticOperations( return it; } +SubExpression makeValueBinaryOpFromLocal(Value& left, const OpCode& binOp, Value& right) +{ + return SubExpression(std::make_shared(binOp, SubExpression(left), SubExpression(right))); +} + // try to convert shl to mul and return it as ValueExpr -std::shared_ptr shlToMul(Value& value, const intermediate::Operation* op) +SubExpression shlToMul(const Value& value, const intermediate::Operation* op) { auto left = op->getFirstArg(); auto right = *op->getSecondArg(); @@ -1143,29 +1148,24 @@ std::shared_ptr shlToMul(Value& value, const intermediate::Operation* if(shiftValue > 0) { auto right = Value(Literal(1 << shiftValue), TYPE_INT32); - return makeValueBinaryOpFromLocal(left, ValueBinaryOp::BinaryOp::Mul, right); + return makeValueBinaryOpFromLocal(left, OP_FMUL, right); } else { - return std::make_shared(value); + return SubExpression(value); } } -std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) +SubExpression iiToExpr(const Value& value, const LocalUser* inst) { - using BO = ValueBinaryOp::BinaryOp; - BO binOp = BO::Other; - // add, sub, shr, shl, asr if(auto op = dynamic_cast(inst)) { - if(op->op == OP_ADD) - { - binOp = BO::Add; - } - else if(op->op == OP_SUB) + if(op->op == OP_ADD || op->op == OP_SUB) { - binOp = BO::Sub; + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, op->op, right); } else if(op->op == OP_SHL) { @@ -1176,28 +1176,25 @@ std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) else { // If op is neither add nor sub, return value as-is. - return std::make_shared(value); + return SubExpression(value); } - - auto left = op->getFirstArg(); - auto right = *op->getSecondArg(); - return makeValueBinaryOpFromLocal(left, binOp, right); } // mul, div else if(auto op = dynamic_cast(inst)) { + OpCode binOp = OP_NOP; if(op->opCode == "mul") { - binOp = BO::Mul; + binOp = Expression::FAKEOP_MUL; } else if(op->opCode == "div") { - binOp = BO::Div; + binOp = Expression::FAKEOP_DIV; } else { // If op is neither add nor sub, return value as-is. - return std::make_shared(value); + return SubExpression(value); } auto left = op->getFirstArg(); @@ -1205,15 +1202,150 @@ std::shared_ptr iiToExpr(Value& value, const LocalUser* inst) return makeValueBinaryOpFromLocal(left, binOp, right); } - return std::make_shared(value); + return SubExpression(value); } -std::shared_ptr calcValueExpr(std::shared_ptr expr) +Optional getIntegerFromExpression(const SubExpression& expr) { - using BO = ValueBinaryOp::BinaryOp; + if(auto value = expr.checkValue()) + { + if(auto lit = value->checkLiteral()) + { + return Optional(lit->signedInt()); + } + else if(auto imm = value->checkImmediate()) + { + return imm->getIntegerValue(); + } + } + return Optional(); +} + +// signed, value +using ExpandedExprs = std::vector>; - ValueExpr::ExpandedExprs expanded; - expr->expand(expanded); +void expandExpression(const SubExpression& subExpr, ExpandedExprs& expanded) +{ + if(auto expr = subExpr.checkExpression()) + { + ExpandedExprs leftEE, rightEE; + auto& left = expr->arg0; + auto& right = expr->arg1; + auto& op = expr->code; + + expandExpression(left, leftEE); + expandExpression(right, rightEE); + + auto getInteger = [](const std::pair& v) { + std::function(const int&)> addSign = [&](const int& num) { + return make_optional(v.first ? num : -num); + }; + return getIntegerFromExpression(v.second) & addSign; + }; + + auto leftNum = (leftEE.size() == 1) ? getInteger(leftEE[0]) : Optional(); + auto rightNum = (rightEE.size() == 1) ? getInteger(rightEE[0]) : Optional(); + + auto append = [](ExpandedExprs& ee1, ExpandedExprs& ee2) { ee1.insert(ee1.end(), ee2.begin(), ee2.end()); }; + + if(leftNum && rightNum) + { + int l = leftNum.value_or(0); + int r = rightNum.value_or(0); + int num = 0; + + if(op == OP_ADD) + { + num = l + r; + } + else if(op == OP_SUB) + { + num = l - r; + } + else if(op == Expression::FAKEOP_MUL) + { + num = l * r; + } + else if(op == Expression::FAKEOP_DIV) + { + num = l / r; + } + else + { + throw CompilationError(CompilationStep::OPTIMIZER, "Unknown operation", op.name); + } + + // TODO: Care other types + auto value = Value(Literal(std::abs(num)), TYPE_INT32); + SubExpression foldedExpr(value); + expanded.push_back(std::make_pair(true, foldedExpr)); + } + else + { + if(op == OP_ADD) + { + append(expanded, leftEE); + append(expanded, rightEE); + } + else if(op == OP_SUB) + { + append(expanded, leftEE); + + for(auto& e : rightEE) + { + e.first = !e.first; + } + append(expanded, rightEE); + } + else if(op == Expression::FAKEOP_MUL) + { + if(leftNum || rightNum) + { + int num = 0; + ExpandedExprs* ee = nullptr; + if(leftNum) + { + num = leftNum.value_or(0); + ee = &rightEE; + } + else + { + num = rightNum.value_or(0); + ee = &leftEE; + } + for(int i = 0; i < num; i++) + { + append(expanded, *ee); + } + } + else + { + expanded.push_back(std::make_pair(true, SubExpression(std::make_shared(op, left, right)))); + } + } + else if(op == Expression::FAKEOP_DIV) + { + expanded.push_back(std::make_pair(true, SubExpression(std::make_shared(op, left, right)))); + } + else + { + throw CompilationError(CompilationStep::OPTIMIZER, "Unknown operation", op.name); + } + } + } + else if(auto value = subExpr.checkValue()) + { + expanded.push_back(std::make_pair(true, subExpr)); + } + else { + throw CompilationError(CompilationStep::OPTIMIZER, "Cannot expand expression", subExpr.to_string()); + } +} + +SubExpression calcValueExpr(const SubExpression& expr) +{ + ExpandedExprs expanded; + expandExpression(expr, expanded); // for(auto& p : expanded) // logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; @@ -1221,10 +1353,9 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) for(auto p = expanded.begin(); p != expanded.end();) { - auto comp = std::find_if( - expanded.begin(), expanded.end(), [&p](const std::pair>& other) { - return p->first != other.first && *p->second == *other.second; - }); + auto comp = std::find_if(expanded.begin(), expanded.end(), [&p](const std::pair& other) { + return p->first != other.first && p->second == other.second; + }); if(comp != expanded.end()) { expanded.erase(comp); @@ -1236,18 +1367,24 @@ std::shared_ptr calcValueExpr(std::shared_ptr expr) } } - std::shared_ptr result = std::make_shared(INT_ZERO); + SubExpression result(INT_ZERO); for(auto& p : expanded) { - result = std::make_shared(result, p.first ? BO::Add : BO::Sub, p.second); + result = SubExpression(std::make_shared(p.first ? OP_ADD : OP_SUB, result, p.second)); } return result; } +SubExpression replaceLocalToExpr(const SubExpression& expr, const Value& local, SubExpression newExpr) +{ + return expr; +} + void optimizations::combineDMALoads(const Module& module, Method& method, const Configuration& config) { using namespace std; + using namespace VariantNamespace; const std::regex vloadReg("vload(2|3|4|8|16)"); @@ -1306,7 +1443,7 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const logging::debug() << inst->to_string() << logging::endl; } - std::vector>> addrExprs; + std::vector> addrExprs; for(auto& addrValue : offsetValues) { @@ -1318,13 +1455,13 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const } else { - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + addrExprs.push_back(std::make_pair(addrValue, SubExpression(addrValue))); } } else { // TODO: is it ok? - addrExprs.push_back(std::make_pair(addrValue, std::make_shared(addrValue))); + addrExprs.push_back(std::make_pair(addrValue, SubExpression(addrValue))); } } @@ -1332,33 +1469,32 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const { for(auto& other : addrExprs) { - auto replaced = current.second->replaceLocal(other.first, other.second); - current.second = replaced; + current.second = replaceLocalToExpr(current.second, other.first, other.second); } } for(auto& pair : addrExprs) { - logging::debug() << pair.first.to_string() << " = " << pair.second->to_string() << logging::endl; + logging::debug() << pair.first.to_string() << " = " << pair.second.to_string() << logging::endl; } - std::shared_ptr diff = nullptr; + SubExpression diff; bool eqDiff = true; for(size_t i = 1; i < addrExprs.size(); i++) { auto x = addrExprs[i - 1].second; auto y = addrExprs[i].second; - auto diffExpr = std::make_shared(y, ValueBinaryOp::BinaryOp::Sub, x); + auto diffExpr = SubExpression(std::make_shared(OP_SUB, y, x)); auto currentDiff = calcValueExpr(diffExpr); // Apply calcValueExpr again for integer literals. currentDiff = calcValueExpr(currentDiff); - if(diff == nullptr) + if(!diff) { diff = currentDiff; } - if(*currentDiff != *diff) + if(currentDiff != diff) { eqDiff = false; break; @@ -1371,16 +1507,16 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const if(eqDiff) { // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. - ValueExpr::ExpandedExprs expanded; - diff->expand(expanded); + ExpandedExprs expanded; + expandExpression(diff, expanded); if(expanded.size() == 1) { diff = expanded[0].second; // logging::debug() << "diff = " << diff->to_string() << logging::endl; - auto term = std::dynamic_pointer_cast(diff); - auto mpValue = (term != nullptr) ? term->value.getConstantValue() : Optional{}; + auto term = diff.getConstantExpression(); + auto mpValue = term.has_value() ? term->getConstantValue() : Optional{}; auto mpLiteral = mpValue.has_value() ? mpValue->getLiteralValue() : Optional{}; if(mpLiteral) diff --git a/src/optimization/ValueExpr.cpp b/src/optimization/ValueExpr.cpp deleted file mode 100644 index 6fab1c08..00000000 --- a/src/optimization/ValueExpr.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Author: doe300 - * - * See the file "LICENSE" for the full license governing this code. - */ - -#include "ValueExpr.h" - -#include "../Locals.h" - -using namespace vc4c; -using namespace vc4c::optimizations; - -bool ValueBinaryOp::operator==(const ValueExpr& other) const -{ - if(auto otherOp = dynamic_cast(&other)) - { - return op == otherOp->op && *right == *otherOp->right && *left == *otherOp->left; - } - return false; -} - -std::shared_ptr ValueBinaryOp::replaceLocal(const Value& value, std::shared_ptr expr) -{ - return std::make_shared(left->replaceLocal(value, expr), op, right->replaceLocal(value, expr)); -} - -void ValueBinaryOp::expand(ExpandedExprs& exprs) -{ - ExpandedExprs leftEE, rightEE; - left->expand(leftEE); - right->expand(rightEE); - - auto getInteger = [](const std::pair> &v) { - std::function(const int&)> addSign = [&](const int& num) { - return make_optional(v.first ? num : -num); - }; - return v.second->getInteger() & addSign; - }; - - auto leftNum = (leftEE.size() == 1) ? getInteger(leftEE[0]) : Optional(); - auto rightNum = (rightEE.size() == 1) ? getInteger(rightEE[0]) : Optional(); - - auto append = [](ExpandedExprs &ee1, ExpandedExprs &ee2) { - ee1.insert(ee1.end(), ee2.begin(), ee2.end()); - }; - - if(leftNum && rightNum) - { - int l = leftNum.value_or(0); - int r = rightNum.value_or(0); - int num = 0; - switch(op) - { - case BinaryOp::Add: - num = l + r; - break; - case BinaryOp::Sub: - num = l - r; - break; - case BinaryOp::Mul: - num = l * r; - break; - case BinaryOp::Div: - num = l / r; - break; - case BinaryOp::Other: - break; - } - - // TODO: Care other types - auto value = Value(Literal(std::abs(num)), TYPE_INT32); - std::shared_ptr expr = std::make_shared(value); - exprs.push_back(std::make_pair(true, expr)); - } - else - { - switch(op) - { - case BinaryOp::Add: - { - append(exprs, leftEE); - append(exprs, rightEE); - break; - } - case BinaryOp::Sub: - { - append(exprs, leftEE); - - for(auto& e : rightEE) - { - e.first = !e.first; - } - append(exprs, rightEE); - break; - } - case BinaryOp::Mul: - { - if(leftNum || rightNum) - { - int num = 0; - ExpandedExprs *ee = nullptr; - if(leftNum) - { - num = leftNum.value_or(0); - ee = &rightEE; - } - else - { - num = rightNum.value_or(0); - ee = &leftEE; - } - for(int i = 0; i < num; i++) - { - append(exprs, *ee); - } - } - else - { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - } - break; - } - case BinaryOp::Div: - { - exprs.push_back(std::make_pair(true, std::make_shared(left, op, right))); - break; - } - case BinaryOp::Other: - break; - } - } -} - -Optional ValueBinaryOp::getInteger() const -{ - return Optional(); -} - -std::string ValueBinaryOp::to_string() const -{ - std::string opStr; - switch(op) - { - case BinaryOp::Add: - opStr = "+"; - break; - case BinaryOp::Sub: - opStr = "-"; - break; - case BinaryOp::Mul: - opStr = "*"; - break; - case BinaryOp::Div: - opStr = "/"; - break; - case BinaryOp::Other: - opStr = "other"; - break; - } - - return "(" + left->to_string() + " " + opStr + " " + right->to_string() + ")"; -} - -std::shared_ptr optimizations::makeValueBinaryOpFromLocal( - Value& left, ValueBinaryOp::BinaryOp binOp, Value& right) -{ - return std::make_shared( - std::make_shared(left), binOp, std::make_shared(right)); -} - -bool ValueTerm::operator==(const ValueExpr& other) const -{ - if(auto otherTerm = dynamic_cast(&other)) - return value == otherTerm->value; - return false; -} - -std::shared_ptr ValueTerm::replaceLocal(const Value& from, std::shared_ptr expr) -{ - if(auto fromLocal = from.checkLocal()) - { - if(auto valueLocal = value.checkLocal()) - { - if(*fromLocal == *valueLocal) - { - return expr; - } - } - } - return std::make_shared(value); -} - -void ValueTerm::expand(ExpandedExprs& exprs) -{ - exprs.push_back(std::make_pair(true, std::make_shared(value))); -} - -Optional ValueTerm::getInteger() const -{ - if(auto lit = value.checkLiteral()) - { - return Optional(lit->signedInt()); - } - else if(auto imm = value.checkImmediate()) - { - return imm->getIntegerValue(); - } - return Optional(); -} - -std::string ValueTerm::to_string() const -{ - return value.to_string(); -} diff --git a/src/optimization/ValueExpr.h b/src/optimization/ValueExpr.h deleted file mode 100644 index 5561b733..00000000 --- a/src/optimization/ValueExpr.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Author: doe300 - * - * See the file "LICENSE" for the full license governing this code. - */ -#ifndef VC4C_OPTIMIZATION_VALUEEXPR -#define VC4C_OPTIMIZATION_VALUEEXPR - -#include "../Values.h" - -#include -#include - -namespace vc4c -{ - namespace optimizations - { - class ValueExpr - { - public: - // signed, value - using ExpandedExprs = std::vector>>; - - virtual ~ValueExpr() = default; - - virtual bool operator==(const ValueExpr& other) const = 0; - inline bool operator!=(const ValueExpr& other) const - { - return !(*this == other); - } - - virtual std::shared_ptr replaceLocal(const Value& value, std::shared_ptr expr) = 0; - - // expand value expr as liner combination - // e.g. (a + b) * c = a * c + b * c - virtual void expand(ExpandedExprs& exprs) = 0; - - virtual Optional getInteger() const = 0; - - virtual std::string to_string() const = 0; - }; - - class ValueBinaryOp : public ValueExpr - { - public: - enum class BinaryOp - { - Add, - Sub, - Mul, - Div, - Other, - }; - - ValueBinaryOp(std::shared_ptr left, BinaryOp op, std::shared_ptr right) : - left(left), op(op), right(right) - { - } - - bool operator==(const ValueExpr& other) const override; - - std::shared_ptr replaceLocal(const Value& value, std::shared_ptr expr) override; - - void expand(ExpandedExprs& exprs) override; - - Optional getInteger() const override; - - std::string to_string() const override; - - std::shared_ptr left; - BinaryOp op; - std::shared_ptr right; - }; - - std::shared_ptr makeValueBinaryOpFromLocal(Value& left, ValueBinaryOp::BinaryOp binOp, Value& right); - - class ValueTerm : public ValueExpr - { - public: - ValueTerm(const Value& value) : value(value) {} - - bool operator==(const ValueExpr& other) const override; - - std::shared_ptr replaceLocal(const Value& from, std::shared_ptr expr) override; - - void expand(ExpandedExprs& exprs) override; - - Optional getInteger() const override; - - std::string to_string() const override; - - const Value value; - }; - - } /* namespace optimizations */ -} /* namespace vc4c */ - -#endif /* VC4C_OPTIMIZATION_VALUEEXPR */ diff --git a/src/optimization/sources.list b/src/optimization/sources.list index ad9588fa..79821bc8 100644 --- a/src/optimization/sources.list +++ b/src/optimization/sources.list @@ -8,5 +8,4 @@ target_sources(${VC4C_LIBRARY_NAME} ${CMAKE_CURRENT_LIST_DIR}/Optimizer.cpp ${CMAKE_CURRENT_LIST_DIR}/Reordering.cpp ${CMAKE_CURRENT_LIST_DIR}/InstructionScheduler.cpp - ${CMAKE_CURRENT_LIST_DIR}/ValueExpr.cpp ) diff --git a/test/TestOptimizationSteps.cpp b/test/TestOptimizationSteps.cpp index a9417ddf..3dfa87ed 100644 --- a/test/TestOptimizationSteps.cpp +++ b/test/TestOptimizationSteps.cpp @@ -17,8 +17,6 @@ #include "optimization/Flags.h" #include "periphery/VPM.h" -#include "optimization/ValueExpr.h" - #include #include "log.h" @@ -2194,21 +2192,21 @@ void TestOptimizationSteps::testCombineDMALoads() testCombineDMALoadsSub(module, inputMethod, config, Float16); } - { - // ValueExpr::expand - - Literal l(2); - Value a(l, TYPE_INT32); - Value b = 3_val; - std::shared_ptr expr( - new ValueBinaryOp(makeValueBinaryOpFromLocal(a, ValueBinaryOp::BinaryOp::Add, b), - ValueBinaryOp::BinaryOp::Sub, std::make_shared(1_val))); - ValueExpr::ExpandedExprs expanded; - expr->expand(expanded); - - TEST_ASSERT_EQUALS(1, expanded.size()); - - auto n = expanded[0].second->getInteger(); - TEST_ASSERT_EQUALS(4, n.value_or(0)); - } + // { + // // expand + // + // Literal l(2); + // Value a(l, TYPE_INT32); + // Value b = 3_val; + // SubExpression expr( + // new ValueBinaryOp(makeValueBinaryOpFromLocal(a, ValueBinaryOp::BinaryOp::Add, b), + // ValueBinaryOp::BinaryOp::Sub, std::make_shared(1_val))); + // ValueExpr::ExpandedExprs expanded; + // expr->expand(expanded); + // + // TEST_ASSERT_EQUALS(1, expanded.size()); + // + // auto n = expanded[0].second->getInteger(); + // TEST_ASSERT_EQUALS(4, n.value_or(0)); + // } } From 6237397465c63d564e3231b6841e8d8127365a39 Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 11 Oct 2020 16:58:02 +0900 Subject: [PATCH 27/30] Fix calcValueExpr to take ExpandedExprs --- src/optimization/Combiner.cpp | 96 +++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 77fed6f2..12993dc3 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -6,11 +6,11 @@ #include "Combiner.h" +#include "../Expression.h" #include "../InstructionWalker.h" #include "../analysis/MemoryAnalysis.h" #include "../intermediate/Helper.h" #include "../intermediate/operators.h" -#include "../Expression.h" #include "../periphery/VPM.h" #include "../spirv/SPIRVHelper.h" #include "Eliminator.h" @@ -1221,8 +1221,20 @@ Optional getIntegerFromExpression(const SubExpression& expr) return Optional(); } -// signed, value -using ExpandedExprs = std::vector>; +// signed, value +class ExpandedExprs : public std::vector> +{ +public: + std::string to_string() const + { + std::stringstream ss; + for(auto& p : *this) + { + ss << (p.first ? "+" : "-") << p.second.to_string(); + } + return ss.str(); + } +}; void expandExpression(const SubExpression& subExpr, ExpandedExprs& expanded) { @@ -1320,7 +1332,8 @@ void expandExpression(const SubExpression& subExpr, ExpandedExprs& expanded) } else { - expanded.push_back(std::make_pair(true, SubExpression(std::make_shared(op, left, right)))); + expanded.push_back( + std::make_pair(true, SubExpression(std::make_shared(op, left, right)))); } } else if(op == Expression::FAKEOP_DIV) @@ -1337,15 +1350,16 @@ void expandExpression(const SubExpression& subExpr, ExpandedExprs& expanded) { expanded.push_back(std::make_pair(true, subExpr)); } - else { + else + { throw CompilationError(CompilationStep::OPTIMIZER, "Cannot expand expression", subExpr.to_string()); } } -SubExpression calcValueExpr(const SubExpression& expr) +void calcValueExpr(ExpandedExprs& expanded) { - ExpandedExprs expanded; - expandExpression(expr, expanded); + // ExpandedExprs expanded; + // expandExpression(expr, expanded); // for(auto& p : expanded) // logging::debug() << (p.first ? "+" : "-") << p.second->to_string() << " "; @@ -1367,13 +1381,13 @@ SubExpression calcValueExpr(const SubExpression& expr) } } - SubExpression result(INT_ZERO); - for(auto& p : expanded) - { - result = SubExpression(std::make_shared(p.first ? OP_ADD : OP_SUB, result, p.second)); - } - - return result; + // SubExpression result(INT_ZERO); + // for(auto& p : expanded) + // { + // result = SubExpression(std::make_shared(p.first ? OP_ADD : OP_SUB, result, p.second)); + // } + // + // return result; } SubExpression replaceLocalToExpr(const SubExpression& expr, const Value& local, SubExpression newExpr) @@ -1478,7 +1492,7 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const logging::debug() << pair.first.to_string() << " = " << pair.second.to_string() << logging::endl; } - SubExpression diff; + ExpandedExprs diff; bool eqDiff = true; for(size_t i = 1; i < addrExprs.size(); i++) { @@ -1486,36 +1500,53 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const auto y = addrExprs[i].second; auto diffExpr = SubExpression(std::make_shared(OP_SUB, y, x)); - auto currentDiff = calcValueExpr(diffExpr); + ExpandedExprs currentDiff; + expandExpression(diffExpr, currentDiff); + + calcValueExpr(currentDiff); + // Apply calcValueExpr again for integer literals. - currentDiff = calcValueExpr(currentDiff); + SubExpression currentExpr(INT_ZERO); + for(auto& p : currentDifft) + { + currentExpr = + SubExpression(std::make_shared(p.first ? OP_ADD : OP_SUB, currentExpr, p.second)); + } + currentDiff.clear(); + expandExpression(currentExpr, currentDiff); + calcValueExpr(currentDiff); + + // logging::debug() << currentDiff.to_string() << ", " << diff.to_string() << logging::endl; - if(!diff) + if(i == 1) { - diff = currentDiff; + diff = std::move(currentDiff); } - if(currentDiff != diff) + else if(currentDiff != diff) { eqDiff = false; break; } } - logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") << "equal difference" - << logging::endl; + logging::debug() << addrExprs.size() << " loads are " << (eqDiff ? "" : "not ") + << "equal difference: " << diff.to_string() << logging::endl; if(eqDiff) { // The form of diff should be "0 (+/-) expressions...", then remove the value 0 at most right. - ExpandedExprs expanded; - expandExpression(diff, expanded); - if(expanded.size() == 1) + // ExpandedExprs expanded; + // expandExpression(diff, expanded); + // for (auto& ex : expanded) { + // logging::debug() << "ex = " << ex.second.to_string() << logging::endl; + // } + if(diff.size() == 1) { - diff = expanded[0].second; + auto diffExpr = diff[0].second; - // logging::debug() << "diff = " << diff->to_string() << logging::endl; + // logging::debug() << "diff = " << diff.to_string() << logging::endl; - auto term = diff.getConstantExpression(); + auto term = diffExpr.getConstantExpression(); auto mpValue = term.has_value() ? term->getConstantValue() : Optional{}; auto mpLiteral = mpValue.has_value() ? mpValue->getLiteralValue() : Optional{}; @@ -1554,12 +1585,11 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const elemType.getInMemoryWidth() * DataType::BYTE, vectorLength, false}; uint64_t rows = loadInstrs.size(); - VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); auto entries = Value(Literal(static_cast(rows)), TYPE_INT32); - it = - method.vpm->insertReadRAM(method, it, addr, VectorType, /* &area */ nullptr, - true, INT_ZERO, entries, Optional(memoryPitch)); + it = method.vpm->insertReadRAM(method, it, addr, VectorType, nullptr, true, + INT_ZERO, entries, Optional(memoryPitch)); + VPMArea area(VPMUsage::SCRATCH, 0, static_cast(rows)); it = method.vpm->insertReadVPM(method, it, output, &area, true); } else From 998e468fd047de58375c28e2acbcbb8b29c1d91e Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 11 Oct 2020 17:20:29 +0900 Subject: [PATCH 28/30] Write the documentation of combineDMALoads --- src/optimization/Combiner.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/optimization/Combiner.h b/src/optimization/Combiner.h index e0c38c5c..e9b213c0 100644 --- a/src/optimization/Combiner.h +++ b/src/optimization/Combiner.h @@ -154,7 +154,34 @@ namespace vc4c InstructionWalker combineArithmeticOperations( const Module& module, Method& method, InstructionWalker it, const Configuration& config); - // TODO documentation + /* + * Combines vloadn with one DMA/VPM load. This available only for constant value offset. + * + * Example: + * %call = _Z7vload16jPU3AS1Kf(i32 2, (g) f32* %in) ; vload16 + * %call2 = _Z7vload16jPU3AS1Kf(i32 3, (g) f32* %in) + * %call3 = _Z7vload16jPU3AS1Kf(i32 4, (g) f32* %in) + * + * becomes: + * %tmp.405 = add i32 128, (p) f32* %in + * mutex_acq + * register vpr_setup = vdr_setup(rows: 3, columns: 16 words, address: h32(0,0), vpitch: 1) + * register vpr_setup = loadi vdr_setup(memory pitch: 64 bytes) + * register vpr_addr = i32 %tmp.405 + * register - = register vpr_wait + * mutex_rel + * mutex_acq + * register vpr_setup = loadi vpm_setup(num: 3, size: 16 words, stride: 1 rows, address: h32(0)) + * %tmp.404 = register vpm + * mutex_rel + * mutex_acq + * %tmp.403 = register vpm + * mutex_rel + * mutex_acq + * %tmp.402 = register vpm + * mutex_rel + * + */ void combineDMALoads(const Module& module, Method& method, const Configuration& config); // TODO documentation, TODO move somewhere else?! From 6e5fafaf7028cd8b3d3d753d220c71121e7702ed Mon Sep 17 00:00:00 2001 From: long-long-float Date: Thu, 22 Oct 2020 18:08:05 +0900 Subject: [PATCH 29/30] Implement replaceLocalToExpr --- src/optimization/Combiner.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 12993dc3..089b268d 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -1390,9 +1390,25 @@ void calcValueExpr(ExpandedExprs& expanded) // return result; } -SubExpression replaceLocalToExpr(const SubExpression& expr, const Value& local, SubExpression newExpr) +SubExpression replaceLocalToExpr(const SubExpression& subExpr, const Value& local, SubExpression newExpr) { - return expr; + if(auto expr = subExpr.checkExpression()) + { + return SubExpression(std::make_shared(expr->code, + replaceLocalToExpr(expr->arg0, local, newExpr), + replaceLocalToExpr(expr->arg1, local, newExpr))); + + } + else if(auto replacee = subExpr.checkLocal()) + { + if (auto replacer = local.checkLocal()) { + if (*replacee == *replacer) { + return newExpr; + } + } + } + + return subExpr; } void optimizations::combineDMALoads(const Module& module, Method& method, const Configuration& config) @@ -1507,7 +1523,7 @@ void optimizations::combineDMALoads(const Module& module, Method& method, const // Apply calcValueExpr again for integer literals. SubExpression currentExpr(INT_ZERO); - for(auto& p : currentDifft) + for(auto& p : currentDiff) { currentExpr = SubExpression(std::make_shared(p.first ? OP_ADD : OP_SUB, currentExpr, p.second)); From 414e448afbbd02ff6ba2adfadf1a6741889913ed Mon Sep 17 00:00:00 2001 From: long-long-float Date: Sun, 1 Nov 2020 18:53:38 +0900 Subject: [PATCH 30/30] Treat `or` as `add` --- src/optimization/Combiner.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/optimization/Combiner.cpp b/src/optimization/Combiner.cpp index 089b268d..da209506 100644 --- a/src/optimization/Combiner.cpp +++ b/src/optimization/Combiner.cpp @@ -1167,6 +1167,12 @@ SubExpression iiToExpr(const Value& value, const LocalUser* inst) auto right = *op->getSecondArg(); return makeValueBinaryOpFromLocal(left, op->op, right); } + else if(op->op == OP_OR) // Treat `or` as `add` + { + auto left = op->getFirstArg(); + auto right = *op->getSecondArg(); + return makeValueBinaryOpFromLocal(left, OP_ADD, right); + } else if(op->op == OP_SHL) { // convert shl to mul