From 0d80e9d40d13ce1df96ad8da84a263d28ae6e6ba Mon Sep 17 00:00:00 2001 From: konsti Date: Wed, 13 Mar 2024 18:55:48 +0100 Subject: [PATCH] perf: specialize range operations for better performances (#177) * Use binary search for `Range::contains` * Avoid cloning and dropping the term in prior_cause * Simplify contains * Specialize union code * remove a redundant check and reuse helper * Specialize is_disjoint * simplify and add tests for is_disjoint * Specialize subset_of * simplify and add tests for subset_of * Specialize range operations for better performance * add tests and use in more places * Fix clippy lints * replace a complement with is_disjoint * one fewer complement using intersection * use the symmetry of set functions --------- Co-authored-by: Jacob Finkelman --- src/internal/incompatibility.rs | 7 +- src/internal/partial_solution.rs | 2 +- src/internal/small_map.rs | 43 +++++ src/range.rs | 263 ++++++++++++++++++++++++++----- src/term.rs | 85 ++++++++-- src/version_set.rs | 10 ++ 6 files changed, 356 insertions(+), 54 deletions(-) diff --git a/src/internal/incompatibility.rs b/src/internal/incompatibility.rs index 2dc6fbde..6f6d8010 100644 --- a/src/internal/incompatibility.rs +++ b/src/internal/incompatibility.rs @@ -173,8 +173,11 @@ impl Incompatibility { incompatibility_store: &Arena, ) -> Self { let kind = Kind::DerivedFrom(incompat, satisfier_cause); - let mut package_terms = incompatibility_store[incompat].package_terms.clone(); - let t1 = package_terms.remove(package).unwrap(); + // Optimization to avoid cloning and dropping t1 + let (t1, mut package_terms) = incompatibility_store[incompat] + .package_terms + .split_one(package) + .unwrap(); let satisfier_cause_terms = &incompatibility_store[satisfier_cause].package_terms; package_terms.merge( satisfier_cause_terms.iter().filter(|(p, _)| p != &package), diff --git a/src/internal/partial_solution.rs b/src/internal/partial_solution.rs index cf1d24db..9a4c8841 100644 --- a/src/internal/partial_solution.rs +++ b/src/internal/partial_solution.rs @@ -501,7 +501,7 @@ impl PackageAssignments { let idx = self .dated_derivations .as_slice() - .partition_point(|dd| dd.accumulated_intersection.intersection(start_term) != empty); + .partition_point(|dd| !dd.accumulated_intersection.is_disjoint(start_term)); if let Some(dd) = self.dated_derivations.get(idx) { debug_assert_eq!(dd.accumulated_intersection.intersection(start_term), empty); return (Some(dd.cause), dd.global_index, dd.decision_level); diff --git a/src/internal/small_map.rs b/src/internal/small_map.rs index 8fc16a06..476af91d 100644 --- a/src/internal/small_map.rs +++ b/src/internal/small_map.rs @@ -99,6 +99,49 @@ impl SmallMap { } }; } + + /// Returns a reference to the value for one key and a copy of the map without the key. + /// + /// This is an optimization over the following, where we only need a reference to `t1`. It + /// avoids cloning and then drop the ranges in each `prior_cause` call. + /// ```ignore + /// let mut package_terms = package_terms.clone(); + // let t1 = package_terms.remove(package).unwrap(); + /// ``` + pub fn split_one(&self, key: &K) -> Option<(&V, Self)> + where + K: Clone, + V: Clone, + { + match self { + Self::Empty => None, + Self::One([(k, v)]) => { + if k == key { + Some((v, Self::Empty)) + } else { + None + } + } + Self::Two([(k1, v1), (k2, v2)]) => { + if k1 == key { + Some((v1, Self::One([(k2.clone(), v2.clone())]))) + } else if k2 == key { + Some((v2, Self::One([(k1.clone(), v1.clone())]))) + } else { + None + } + } + Self::Flexible(map) => { + if let Some(value) = map.get(key) { + let mut map = map.clone(); + map.remove(key).unwrap(); + Some((value, Self::Flexible(map))) + } else { + None + } + } + } + } } impl SmallMap { diff --git a/src/range.rs b/src/range.rs index a7eb0e82..4498e2f9 100644 --- a/src/range.rs +++ b/src/range.rs @@ -221,19 +221,19 @@ impl Range { }) } - /// Returns true if the this Range contains the specified value. - pub fn contains(&self, v: &V) -> bool { - for segment in self.segments.iter() { - match within_bounds(v, segment) { - Ordering::Less => return false, - Ordering::Equal => return true, - Ordering::Greater => (), - } - } - false + /// Returns true if this Range contains the specified value. + pub fn contains(&self, version: &V) -> bool { + self.segments + .binary_search_by(|segment| { + // We have to reverse because we need the segment wrt to the version, while + // within bounds tells us the version wrt to the segment. + within_bounds(version, segment).reverse() + }) + // An equal interval is one that contains the version + .is_ok() } - /// Returns true if the this Range contains the specified values. + /// Returns true if this Range contains the specified values. /// /// The `versions` iterator must be sorted. /// Functionally equivalent to `versions.map(|v| self.contains(v))`. @@ -298,14 +298,7 @@ impl Range { fn check_invariants(self) -> Self { if cfg!(debug_assertions) { for p in self.segments.as_slice().windows(2) { - match (&p[0].1, &p[1].0) { - (Included(l_end), Included(r_start)) => assert!(l_end < r_start), - (Included(l_end), Excluded(r_start)) => assert!(l_end < r_start), - (Excluded(l_end), Included(r_start)) => assert!(l_end < r_start), - (Excluded(l_end), Excluded(r_start)) => assert!(l_end <= r_start), - (_, Unbounded) => panic!(), - (Unbounded, _) => panic!(), - } + assert!(end_before_start_with_gap(&p[0].1, &p[1].0)); } for (s, e) in self.segments.iter() { assert!(valid_segment(s, e)); @@ -315,10 +308,16 @@ impl Range { } } -fn within_bounds(v: &V, segment: &Interval) -> Ordering { +/// The ordering of the version wrt to the interval. +/// ```text +/// |-------| +/// ^ ^ ^ +/// less equal greater +/// ``` +fn within_bounds(version: &V, segment: &Interval) -> Ordering { let below_lower_bound = match segment { - (Excluded(start), _) => v <= start, - (Included(start), _) => v < start, + (Excluded(start), _) => version <= start, + (Included(start), _) => version < start, (Unbounded, _) => false, }; if below_lower_bound { @@ -326,8 +325,8 @@ fn within_bounds(v: &V, segment: &Interval) -> Ordering { } let below_upper_bound = match segment { (_, Unbounded) => true, - (_, Included(end)) => v <= end, - (_, Excluded(end)) => v < end, + (_, Included(end)) => version <= end, + (_, Excluded(end)) => version < end, }; if below_upper_bound { return Ordering::Equal; @@ -335,8 +334,10 @@ fn within_bounds(v: &V, segment: &Interval) -> Ordering { Ordering::Greater } +/// A valid segment is one where at least one version fits between start and end fn valid_segment(start: &Bound, end: &Bound) -> bool { match (start, end) { + // Singleton interval are allowed (Included(s), Included(e)) => s <= e, (Included(s), Excluded(e)) => s < e, (Excluded(s), Included(e)) => s < e, @@ -345,6 +346,56 @@ fn valid_segment(start: &Bound, end: &Bound) -> bool { } } +/// The end of one interval is before the start of the next one, so they can't be concatenated +/// into a single interval. The `union` method calling with both intervals and then the intervals +/// switched. If either is true, the intervals are separate in the union and if both are false, they +/// are merged. +/// ```text +/// True for these two: +/// |----| +/// |-----| +/// ^ end ^ start +/// False for these two: +/// |----| +/// |-----| +/// Here it depends: If they both exclude the position they share, there is a version in between +/// them that blocks concatenation +/// |----| +/// |-----| +/// ``` +fn end_before_start_with_gap(end: &Bound, start: &Bound) -> bool { + match (end, start) { + (_, Unbounded) => false, + (Unbounded, _) => false, + (Included(left), Included(right)) => left < right, + (Included(left), Excluded(right)) => left < right, + (Excluded(left), Included(right)) => left < right, + (Excluded(left), Excluded(right)) => left <= right, + } +} + +fn left_start_is_smaller(left: Bound, right: Bound) -> bool { + match (left, right) { + (Unbounded, _) => true, + (_, Unbounded) => false, + (Included(l), Included(r)) => l <= r, + (Excluded(l), Excluded(r)) => l <= r, + (Included(l), Excluded(r)) => l <= r, + (Excluded(l), Included(r)) => l < r, + } +} + +fn left_end_is_smaller(left: Bound, right: Bound) -> bool { + match (left, right) { + (_, Unbounded) => true, + (Unbounded, _) => false, + (Included(l), Included(r)) => l <= r, + (Excluded(l), Excluded(r)) => l <= r, + (Excluded(l), Included(r)) => l <= r, + (Included(l), Excluded(r)) => l < r, + } +} + /// Group adjacent versions locations. /// /// ```text @@ -378,10 +429,60 @@ fn group_adjacent_locations( impl Range { /// Computes the union of this `Range` and another. pub fn union(&self, other: &Self) -> Self { - self.complement() - .intersection(&other.complement()) - .complement() - .check_invariants() + let mut output: SmallVec> = SmallVec::empty(); + let mut accumulator: Option<(&Bound<_>, &Bound<_>)> = None; + let mut left_iter = self.segments.iter().peekable(); + let mut right_iter = other.segments.iter().peekable(); + loop { + let smaller_interval = match (left_iter.peek(), right_iter.peek()) { + (Some((left_start, left_end)), Some((right_start, right_end))) => { + if left_start_is_smaller(left_start.as_ref(), right_start.as_ref()) { + left_iter.next(); + (left_start, left_end) + } else { + right_iter.next(); + (right_start, right_end) + } + } + (Some((left_start, left_end)), None) => { + left_iter.next(); + (left_start, left_end) + } + (None, Some((right_start, right_end))) => { + right_iter.next(); + (right_start, right_end) + } + (None, None) => break, + }; + + if let Some(accumulator_) = accumulator { + if end_before_start_with_gap(accumulator_.1, smaller_interval.0) { + output.push((accumulator_.0.clone(), accumulator_.1.clone())); + accumulator = Some(smaller_interval); + } else { + let accumulator_end = match (accumulator_.1, smaller_interval.1) { + (_, Unbounded) | (Unbounded, _) => &Unbounded, + (Included(l), Excluded(r) | Included(r)) if l == r => accumulator_.1, + (Included(l) | Excluded(l), Included(r) | Excluded(r)) => { + if l > r { + accumulator_.1 + } else { + smaller_interval.1 + } + } + }; + accumulator = Some((accumulator_.0, accumulator_end)); + } + } else { + accumulator = Some(smaller_interval) + } + } + + if let Some(accumulator) = accumulator { + output.push((accumulator.0.clone(), accumulator.1.clone())); + } + + Self { segments: output }.check_invariants() } /// Computes the intersection of two sets of versions. @@ -399,15 +500,7 @@ impl Range { left_iter.peek().zip(right_iter.peek()) { // The next smallest `end` value is going to come from one of the inputs. - let left_end_is_smaller = match (left_end, right_end) { - (Included(l), Included(r)) - | (Excluded(l), Excluded(r)) - | (Excluded(l), Included(r)) => l <= r, - - (Included(l), Excluded(r)) => l < r, - (_, Unbounded) => true, - (Unbounded, _) => false, - }; + let left_end_is_smaller = left_end_is_smaller(left_end.as_ref(), right_end.as_ref()); // Now that we are processing `end` we will never have to process any segment smaller than that. // We can ensure that the input that `end` came from is larger than `end` by advancing it one step. // `end` is the smaller available input, so we know the other input is already larger than `end`. @@ -451,6 +544,72 @@ impl Range { Self { segments: output }.check_invariants() } + /// Return true if there can be no `V` so that `V` is contained in both `self` and `other`. + /// + /// Note that we don't know that set of all existing `V`s here, so we only check if the segments + /// are disjoint, not if no version is contained in both. + pub fn is_disjoint(&self, other: &Self) -> bool { + // The operation is symmetric + let mut left_iter = self.segments.iter().peekable(); + let mut right_iter = other.segments.iter().peekable(); + + while let Some((left, right)) = left_iter.peek().zip(right_iter.peek()) { + if !valid_segment(&right.start_bound(), &left.end_bound()) { + left_iter.next(); + } else if !valid_segment(&left.start_bound(), &right.end_bound()) { + right_iter.next(); + } else { + return false; + } + } + + // The remaining element(s) can't intersect anymore + true + } + + /// Return true if any `V` that is contained in `self` is also contained in `other`. + /// + /// Note that we don't know that set of all existing `V`s here, so we only check if all + /// segments `self` are contained in a segment of `other`. + pub fn subset_of(&self, other: &Self) -> bool { + let mut containing_iter = other.segments.iter(); + let mut subset_iter = self.segments.iter(); + let Some(mut containing_elem) = containing_iter.next() else { + // As long as we have subset elements, we need containing elements + return subset_iter.next().is_none(); + }; + + for subset_elem in subset_iter { + // Check if the current containing element ends before the subset element. + // There needs to be another containing element for our subset element in this case. + while !valid_segment(&subset_elem.start_bound(), &containing_elem.end_bound()) { + if let Some(containing_elem_) = containing_iter.next() { + containing_elem = containing_elem_; + } else { + return false; + }; + } + + let start_contained = + left_start_is_smaller(containing_elem.start_bound(), subset_elem.start_bound()); + + if !start_contained { + // The start element is not contained + return false; + } + + let end_contained = + left_end_is_smaller(subset_elem.end_bound(), containing_elem.end_bound()); + + if !end_contained { + // The end element is not contained + return false; + } + } + + true + } + /// Returns a simpler Range that contains the same versions /// /// For every one of the Versions provided in versions the existing range and @@ -550,6 +709,14 @@ impl VersionSet for Range { fn union(&self, other: &Self) -> Self { Range::union(self, other) } + + fn is_disjoint(&self, other: &Self) -> bool { + Range::is_disjoint(self, other) + } + + fn subset_of(&self, other: &Self) -> bool { + Range::subset_of(self, other) + } } // REPORT ###################################################################### @@ -761,6 +928,28 @@ pub mod tests { assert_eq!(r1.union(&r2).contains(&version), r1.contains(&version) || r2.contains(&version)); } + #[test] + fn is_disjoint_through_intersection(r1 in strategy(), r2 in strategy()) { + let disjoint_def = r1.intersection(&r2) == Range::empty(); + assert_eq!(r1.is_disjoint(&r2), disjoint_def); + } + + #[test] + fn subset_of_through_intersection(r1 in strategy(), r2 in strategy()) { + let disjoint_def = r1.intersection(&r2) == r1; + assert_eq!(r1.subset_of(&r2), disjoint_def); + } + + #[test] + fn union_through_intersection(r1 in strategy(), r2 in strategy()) { + let union_def = r1 + .complement() + .intersection(&r2.complement()) + .complement() + .check_invariants(); + assert_eq!(r1.union(&r2), union_def); + } + // Testing contains -------------------------------- #[test] diff --git a/src/term.rs b/src/term.rs index 2974da62..55ac7518 100644 --- a/src/term.rs +++ b/src/term.rs @@ -6,7 +6,14 @@ use crate::version_set::VersionSet; use std::fmt::{self, Display}; -/// A positive or negative expression regarding a set of versions. +/// A positive or negative expression regarding a set of versions. +/// +/// If a version is selected then `Positive(r)` and `Negative(r.complement())` are equivalent, but +/// they have different semantics when no version is selected. A `Positive` term in the partial +/// solution requires a version to be selected. But a `Negative` term allows for a solution that +/// does not have that package selected. Specifically, `Positive(VS::empty())` means that there was +/// a conflict, we need to select a version for the package but can't pick any, while +/// `Negative(VS::full())` would mean it is fine as long as we don't select the package. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Term { /// For example, "1.0.0 <= v < 2.0.0" is a positive expression @@ -84,32 +91,55 @@ impl Term { /// Set operations with terms. impl Term { /// Compute the intersection of two terms. - /// If at least one term is positive, the intersection is also positive. + /// + /// The intersection is positive if at least one of the two terms is positive. pub(crate) fn intersection(&self, other: &Self) -> Self { match (self, other) { (Self::Positive(r1), Self::Positive(r2)) => Self::Positive(r1.intersection(r2)), - (Self::Positive(r1), Self::Negative(r2)) => { - Self::Positive(r1.intersection(&r2.complement())) - } - (Self::Negative(r1), Self::Positive(r2)) => { - Self::Positive(r1.complement().intersection(r2)) + (Self::Positive(p), Self::Negative(n)) | (Self::Negative(n), Self::Positive(p)) => { + Self::Positive(n.complement().intersection(p)) } (Self::Negative(r1), Self::Negative(r2)) => Self::Negative(r1.union(r2)), } } + /// Check whether two terms are mutually exclusive. + /// + /// An optimization for the native implementation of checking whether the intersection of two sets is empty. + pub(crate) fn is_disjoint(&self, other: &Self) -> bool { + match (self, other) { + (Self::Positive(r1), Self::Positive(r2)) => r1.is_disjoint(r2), + (Self::Negative(r1), Self::Negative(r2)) => r1 == &VS::empty() && r2 == &VS::empty(), + // If the positive term is a subset of the negative term, it lies fully in the region that the negative + // term excludes. + (Self::Positive(p), Self::Negative(n)) | (Self::Negative(n), Self::Positive(p)) => { + p.subset_of(n) + } + } + } + /// Compute the union of two terms. /// If at least one term is negative, the union is also negative. pub(crate) fn union(&self, other: &Self) -> Self { - (self.negate().intersection(&other.negate())).negate() + match (self, other) { + (Self::Positive(r1), Self::Positive(r2)) => Self::Positive(r1.union(r2)), + (Self::Positive(p), Self::Negative(n)) | (Self::Negative(n), Self::Positive(p)) => { + Self::Negative(p.complement().intersection(n)) + } + (Self::Negative(r1), Self::Negative(r2)) => Self::Negative(r1.intersection(r2)), + } } /// Indicate if this term is a subset of another term. /// Just like for sets, we say that t1 is a subset of t2 /// if and only if t1 ∩ t2 = t1. - #[cfg(test)] pub(crate) fn subset_of(&self, other: &Self) -> bool { - self == &self.intersection(other) + match (self, other) { + (Self::Positive(r1), Self::Positive(r2)) => r1.subset_of(r2), + (Self::Positive(r1), Self::Negative(r2)) => r1.is_disjoint(r2), + (Self::Negative(_), Self::Positive(_)) => false, + (Self::Negative(r1), Self::Negative(r2)) => r2.subset_of(r1), + } } } @@ -158,10 +188,9 @@ impl Term { /// Check if a set of terms satisfies or contradicts a given term. /// Otherwise the relation is inconclusive. pub(crate) fn relation_with(&self, other_terms_intersection: &Self) -> Relation { - let full_intersection = self.intersection(other_terms_intersection); - if &full_intersection == other_terms_intersection { + if other_terms_intersection.subset_of(self) { Relation::Satisfied - } else if full_intersection == Self::empty() { + } else if self.is_disjoint(other_terms_intersection) { Relation::Contradicted } else { Relation::Inconclusive @@ -200,7 +229,6 @@ pub mod tests { crate::range::tests::strategy().prop_map(Term::Negative), ] } - proptest! { // Testing relation -------------------------------- @@ -217,5 +245,34 @@ pub mod tests { } } + /// Ensure that we don't wrongly convert between positive and negative ranges + #[test] + fn positive_negative(term1 in strategy(), term2 in strategy()) { + let intersection_positive = term1.is_positive() || term2.is_positive(); + let union_positive = term1.is_positive() && term2.is_positive(); + assert_eq!(term1.intersection(&term2).is_positive(), intersection_positive); + assert_eq!(term1.union(&term2).is_positive(), union_positive); + } + + #[test] + fn is_disjoint_through_intersection(r1 in strategy(), r2 in strategy()) { + let disjoint_def = r1.intersection(&r2) == Term::empty(); + assert_eq!(r1.is_disjoint(&r2), disjoint_def); + } + + #[test] + fn subset_of_through_intersection(r1 in strategy(), r2 in strategy()) { + let disjoint_def = r1.intersection(&r2) == r1; + assert_eq!(r1.subset_of(&r2), disjoint_def); + } + + #[test] + fn union_through_intersection(r1 in strategy(), r2 in strategy()) { + let union_def = r1 + .negate() + .intersection(&r2.negate()) + .negate(); + assert_eq!(r1.union(&r2), union_def); + } } } diff --git a/src/version_set.rs b/src/version_set.rs index 501ec700..fae22948 100644 --- a/src/version_set.rs +++ b/src/version_set.rs @@ -57,4 +57,14 @@ pub trait VersionSet: Debug + Display + Clone + Eq { .intersection(&other.complement()) .complement() } + + /// Whether the range have no overlapping segmets + fn is_disjoint(&self, other: &Self) -> bool { + self.intersection(other) == Self::empty() + } + + /// Whether all range of `self` are contained in `other` + fn subset_of(&self, other: &Self) -> bool { + self == &self.intersection(other) + } }