Skip to content

Commit

Permalink
Optimize collection of references in dataflow analyzer clearValues
Browse files Browse the repository at this point in the history
Previously, when clearing values in the data flow analyzer, referencing values were gathered by iterating over all variables and checking if they are contained inside any of the gathered references, reflected in a datastructure of form map(variable -> set<variable>).

Now, it is just checked, if there is any nonempty intersection between the set of variables to clean and the values of the aforementioned map. The check makes use of sets being sorted.

In pathological cases like the chains.sol benchmark, this can bring down the compilation time by approx. 50%.

No functional changes, overall behavior stays the same.
  • Loading branch information
clonker committed Feb 21, 2025
1 parent 6c556c3 commit 5c0ba6c
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 20 deletions.
23 changes: 23 additions & 0 deletions libsolutil/CommonData.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,29 @@ void iterateReplacingWindow(std::vector<T>& _vector, F const& _f, std::index_seq

}

/// Checks if two collections possess a non-empty intersection.
/// Assumes that both inputs are sorted in ascending order.
template<typename Collection1, typename Collection2>
requires (
std::forward_iterator<std::ranges::iterator_t<Collection1>> &&
std::forward_iterator<std::ranges::iterator_t<Collection2>>
)
bool hasNonemptyIntersectionSorted(Collection1 const& _collection1, Collection2 const& _collection2)
{
auto it1 = std::ranges::begin(_collection1);
auto it2 = std::ranges::begin(_collection2);
while (it1 != std::ranges::end(_collection1) && it2 != std::ranges::end(_collection2))
{
if (*it1 == *it2)
return true;
if (*it1 < *it2)
++it1;
else
++it2;
}
return false;
}

/// Function that iterates over the vector @param _vector,
/// calling the function @param _f on sequences of @tparam N of its
/// elements. If @param _f returns a vector, these elements are replaced by
Expand Down
35 changes: 19 additions & 16 deletions libyul/optimiser/DataFlowAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,10 @@ void DataFlowAnalyzer::handleAssignment(std::set<YulName> const& _variables, Exp
}

auto const& referencedVariables = movableChecker.referencedVariables();
std::vector const referencedVariablesSorted(referencedVariables.begin(), referencedVariables.end());
for (auto const& name: _variables)
{
m_state.references[name] = referencedVariables;
m_state.sortedReferences[name] = referencedVariablesSorted;
if (!_isDeclaration)
{
// assignment to slot denoted by "name"
Expand Down Expand Up @@ -310,12 +311,12 @@ void DataFlowAnalyzer::popScope()
for (auto const& name: m_variableScopes.back().variables)
{
m_state.value.erase(name);
m_state.references.erase(name);
m_state.sortedReferences.erase(name);
}
m_variableScopes.pop_back();
}

void DataFlowAnalyzer::clearValues(std::set<YulName> _variables)
void DataFlowAnalyzer::clearValues(std::set<YulName> const& _variablesToClear)
{
// All variables that reference variables to be cleared also have to be
// cleared, but not recursively, since only the value of the original
Expand All @@ -333,30 +334,32 @@ void DataFlowAnalyzer::clearValues(std::set<YulName> _variables)
// First clear storage knowledge, because we do not have to clear
// storage knowledge of variables whose expression has changed,
// since the value is still unchanged.
auto eraseCondition = mapTuple([&_variables](auto&& key, auto&& value) {
return _variables.count(key) || _variables.count(value);
auto eraseCondition = mapTuple([&_variablesToClear](auto&& key, auto&& value) {
return _variablesToClear.count(key) || _variablesToClear.count(value);
});
std::erase_if(m_state.environment.storage, eraseCondition);
std::erase_if(m_state.environment.memory, eraseCondition);
std::erase_if(m_state.environment.keccak, [&_variables](auto&& _item) {
std::erase_if(m_state.environment.keccak, [&_variablesToClear](auto&& _item) {
return
_variables.count(_item.first.first) ||
_variables.count(_item.first.second) ||
_variables.count(_item.second);
_variablesToClear.count(_item.first.first) ||
_variablesToClear.count(_item.first.second) ||
_variablesToClear.count(_item.second);
});

// Also clear variables that reference variables to be cleared.
std::set<YulName> referencingVariables;
for (auto const& variableToClear: _variables)
for (auto const& [ref, names]: m_state.references)
if (names.count(variableToClear))
referencingVariables.emplace(ref);
std::set<YulName> referencingVariablesToClear;
std::vector const sortedVariablesToClear(_variablesToClear.begin(), _variablesToClear.end());
for (auto const& [referencingVariable, referencedVariables]: m_state.sortedReferences)
// instead of checking each variable in `referencedVariables`, we check if there is any intersection making use of the
// sortedness of the vectors, which can increase performance by up to 50% in pathological cases
if (hasNonemptyIntersectionSorted(referencedVariables, sortedVariablesToClear))
referencingVariablesToClear.emplace(referencingVariable);

// Clear the value and update the reference relation.
for (auto const& name: _variables + referencingVariables)
for (auto const& name: _variablesToClear + referencingVariablesToClear)
{
m_state.value.erase(name);
m_state.references.erase(name);
m_state.sortedReferences.erase(name);
}
}

Expand Down
7 changes: 4 additions & 3 deletions libyul/optimiser/DataFlowAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class DataFlowAnalyzer: public ASTModifier

/// @returns the current value of the given variable, if known - always movable.
AssignedValue const* variableValue(YulName _variable) const { return util::valueOrNullptr(m_state.value, _variable); }
std::set<YulName> const* references(YulName _variable) const { return util::valueOrNullptr(m_state.references, _variable); }
std::vector<YulName> const* sortedReferences(YulName _variable) const { return util::valueOrNullptr(m_state.sortedReferences, _variable); }
std::map<YulName, AssignedValue> const& allValues() const { return m_state.value; }
std::optional<YulName> storageValue(YulName _key) const;
std::optional<YulName> memoryValue(YulName _key) const;
Expand All @@ -122,7 +122,7 @@ class DataFlowAnalyzer: public ASTModifier

/// Clears information about the values assigned to the given variables,
/// for example at points where control flow is merged.
void clearValues(std::set<YulName> _names);
void clearValues(std::set<YulName> const& _variablesToClear);

virtual void assignValue(YulName _variable, Expression const* _value);

Expand Down Expand Up @@ -180,7 +180,8 @@ class DataFlowAnalyzer: public ASTModifier
/// Current values of variables, always movable.
std::map<YulName, AssignedValue> value;
/// m_references[a].contains(b) <=> the current expression assigned to a references b
std::unordered_map<YulName, std::set<YulName>> references;
/// The mapped vectors _must always_ be sorted
std::unordered_map<YulName, std::vector<YulName>> sortedReferences;

Environment environment;
};
Expand Down
2 changes: 1 addition & 1 deletion libyul/optimiser/Rematerialiser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void Rematerialiser::visit(Expression& _e)
)
{
assertThrow(m_referenceCounts[name] > 0, OptimizerException, "");
auto variableReferences = references(name);
auto variableReferences = sortedReferences(name);
if (!variableReferences || ranges::all_of(*variableReferences, [&](auto const& ref) { return inScope(ref); }))
{
// update reference counts
Expand Down

0 comments on commit 5c0ba6c

Please sign in to comment.