From a6b84ccf271aee24116cef806da722eea688176a Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Tue, 16 Jul 2024 13:26:35 +0200 Subject: [PATCH] Implement improved compat sorting when multiple artifacts are compatible (#45) --- CHANGELOG.md | 8 ++ lib/descriptor/arch.rs | 4 +- lib/descriptor/mod.rs | 4 - lib/descriptor/os.rs | 4 +- lib/sources/artifact/mod.rs | 67 ++++++------- lib/sources/artifact/sorting.rs | 166 ++++++++++++++++++++++++++++++++ lib/util/str.rs | 9 ++ 7 files changed, 219 insertions(+), 43 deletions(-) create mode 100644 lib/sources/artifact/sorting.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 837537e..b05f16c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- Fixed `selene` and other tools not being installable because their releases contain multiple archives / binaries ([#45]) + +[#45]: https://github.com/rojo-rbx/rokit/pull/45 + ## `0.1.7` - July 15th, 2024 ### Fixed diff --git a/lib/descriptor/arch.rs b/lib/descriptor/arch.rs index e143a96..d22d3b5 100644 --- a/lib/descriptor/arch.rs +++ b/lib/descriptor/arch.rs @@ -1,6 +1,8 @@ use std::env::consts::ARCH as CURRENT_ARCH; -use super::{char_is_word_separator, executable_parsing::parse_executable, OS}; +use crate::util::str::char_is_word_separator; + +use super::{executable_parsing::parse_executable, OS}; // Matching substrings - these can be partial matches, eg. "wordwin64" will match as x64 arch // These will take priority over full word matches, and should be as precise as possible diff --git a/lib/descriptor/mod.rs b/lib/descriptor/mod.rs index af75747..8351eac 100644 --- a/lib/descriptor/mod.rs +++ b/lib/descriptor/mod.rs @@ -181,10 +181,6 @@ impl FromStr for Descriptor { } } -fn char_is_word_separator(c: char) -> bool { - c == '-' || c == '_' || c.is_ascii_whitespace() -} - #[cfg(test)] mod tests { use super::*; diff --git a/lib/descriptor/os.rs b/lib/descriptor/os.rs index 15144bb..a6c1c8d 100644 --- a/lib/descriptor/os.rs +++ b/lib/descriptor/os.rs @@ -1,6 +1,8 @@ use std::env::consts::OS as CURRENT_OS; -use super::{char_is_word_separator, executable_parsing::parse_executable}; +use crate::util::str::char_is_word_separator; + +use super::executable_parsing::parse_executable; // Matching substrings - these can be partial matches, eg. "wordwin64" will match as windows OS // These will take priority over full word matches, and should be as precise as possible diff --git a/lib/sources/artifact/mod.rs b/lib/sources/artifact/mod.rs index 0bbd0b6..8533d72 100644 --- a/lib/sources/artifact/mod.rs +++ b/lib/sources/artifact/mod.rs @@ -16,8 +16,10 @@ use super::{ mod format; mod provider; +mod sorting; mod util; +use self::sorting::sort_preferred_artifact; use self::util::split_filename_and_extensions; pub use self::format::ArtifactFormat; @@ -112,7 +114,7 @@ impl Artifact { } /** - Sorts the given artifacts by system compatibility. + Sorts the given artifacts by their compatibility with the current system. See also: @@ -121,33 +123,7 @@ impl Artifact { - [`Descriptor::sort_by_preferred_compat`] */ pub fn sort_by_system_compatibility(artifacts: impl AsRef<[Self]>) -> Vec { - let current_desc = Descriptor::current_system(); - - let mut compatible_artifacts = artifacts - .as_ref() - .iter() - .filter_map(|artifact| { - let name = artifact.name.as_deref()?; - if let Some(asset_desc) = Descriptor::detect(name) { - if current_desc.is_compatible_with(&asset_desc) { - Some((asset_desc, artifact)) - } else { - None - } - } else { - None - } - }) - .collect::>(); - - compatible_artifacts.sort_by(|(desc_a, _), (desc_b, _)| { - current_desc.sort_by_preferred_compat(desc_a, desc_b) - }); - - compatible_artifacts - .into_iter() - .map(|(_, artifact)| artifact.clone()) - .collect() + Self::sort_by_system_compatibility_inner(artifacts, false) } /** @@ -155,19 +131,31 @@ impl Artifact { during artifact selection if [`Artifact::sort_by_system_compatibility`] finds no system-compatible artifacts to use. - Returns `None` if more than one artifact is partially compatible. + Note that this not is guaranteed to be compatible with the current + system, the contents of the artifact should be checked before use. */ pub fn find_partially_compatible_fallback(artifacts: impl AsRef<[Self]>) -> Option { + Self::sort_by_system_compatibility_inner(artifacts, true) + .into_iter() + .next() + } + + fn sort_by_system_compatibility_inner( + artifacts: impl AsRef<[Self]>, + allow_partial_compatibility: bool, + ) -> Vec { let current_desc = Descriptor::current_system(); - let os_compatible_artifacts = artifacts + let mut compatible_artifacts = artifacts .as_ref() .iter() .filter_map(|artifact| { let name = artifact.name.as_deref()?; if let Some(asset_desc) = Descriptor::detect(name) { - if current_desc.os() == asset_desc.os() { - Some(artifact) + let is_fully_compatible = current_desc.is_compatible_with(&asset_desc); + let is_os_compatible = current_desc.os() == asset_desc.os(); + if is_fully_compatible || (allow_partial_compatibility && is_os_compatible) { + Some((asset_desc, artifact)) } else { None } @@ -177,10 +165,15 @@ impl Artifact { }) .collect::>(); - if os_compatible_artifacts.len() == 1 { - Some(os_compatible_artifacts[0].clone()) - } else { - None - } + compatible_artifacts.sort_by(|(desc_a, artifact_a), (desc_b, artifact_b)| { + current_desc + .sort_by_preferred_compat(desc_a, desc_b) + .then_with(|| sort_preferred_artifact(artifact_a, artifact_b)) + }); + + compatible_artifacts + .into_iter() + .map(|(_, artifact)| artifact.clone()) + .collect() } } diff --git a/lib/sources/artifact/sorting.rs b/lib/sources/artifact/sorting.rs new file mode 100644 index 0000000..e9208c0 --- /dev/null +++ b/lib/sources/artifact/sorting.rs @@ -0,0 +1,166 @@ +use std::cmp::Ordering; + +use semver::Version; + +use crate::{ + descriptor::{Arch, OS}, + tool::ToolId, + util::str::char_is_word_separator, +}; + +use super::Artifact; + +/** + Helper function to sort which artifact is preferred, based on + heuristics such as which tool name mentions the artifact name + more closely, and is probably more desirable for a user. + + This **currently** means that, if a tool is named `tool`, + and we have these two artifacts: + + - Artifact A: `tool-v1.0.0-x86_64-linux` + - Artifact B: `tool-with-extras-in-name-v1.0.0-x86_64-linux` + + Then A would be preferred, because it mentions + the tool name more precisely, and nothing else. + + Note that this sorting method is subject to change + and should not be directly exposed in a public API. +*/ +pub(super) fn sort_preferred_artifact(artifact_a: &Artifact, artifact_b: &Artifact) -> Ordering { + let count_a = count_non_tool_mentions( + artifact_a.name.as_deref().unwrap_or_default(), + artifact_a.tool_spec.id(), + ); + let count_b = count_non_tool_mentions( + artifact_b.name.as_deref().unwrap_or_default(), + artifact_b.tool_spec.id(), + ); + count_a.cmp(&count_b) +} + +fn count_non_tool_mentions(name: impl AsRef, tool_id: &ToolId) -> usize { + let name = name.as_ref(); + if name.trim().is_empty() { + return 0; + } + + let name_words = name + .split(char_is_word_separator) + .filter(|s| word_is_not_arch_or_os_or_version_or_numeric(s)) + .collect::>(); + let tool_words = tool_id + .name() + .split(char_is_word_separator) + .collect::>(); + + #[allow(clippy::cast_possible_wrap)] + let len_diff = ((name_words.len() as isize) - (tool_words.len() as isize)).unsigned_abs(); + + let mut word_diff = 0; + for (name_word, tool_word) in name_words.into_iter().zip(tool_words) { + if !name_word.eq_ignore_ascii_case(tool_word) { + word_diff += 1; + } + } + + len_diff + word_diff +} + +fn word_is_not_arch_or_os_or_version_or_numeric(word: impl AsRef) -> bool { + let word = word.as_ref(); + Arch::detect(word).is_none() + && OS::detect(word).is_none() + && word.trim_start_matches('v').parse::().is_err() + && !word.chars().all(char::is_numeric) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn new_id(author: &str, name: &str) -> ToolId { + format!("{author}/{name}").parse().unwrap() + } + + fn test_no_mentions(name: &str, tool_name: &str) { + let tool_id = new_id("author", tool_name); + assert_eq!( + count_non_tool_mentions(name, &tool_id), + 0, + "Expected no non-tool mentions in name: {name}" + ); + } + + fn test_some_mentions(name: &str, tool_name: &str) { + let tool_id = new_id("author", tool_name); + assert_ne!( + count_non_tool_mentions(name, &tool_id), + 0, + "Expected non-tool mentions in name: {name}" + ); + } + + #[test] + fn name_mention_check_tool_valid() { + // Single word tools + test_no_mentions("tool", "tool"); + test_no_mentions("tool-linux", "tool"); + test_no_mentions("tool-v1.0.0", "tool"); + test_no_mentions("tool-1.0.0-x86_64-linux", "tool"); + test_no_mentions("tool-x86_64-linux", "tool"); + test_no_mentions("tool-x86_64-linux-v1.0.0", "tool"); + // Multiple word tools + test_no_mentions("super-tool", "super-tool"); + test_no_mentions("super-tool-linux", "super-tool"); + test_no_mentions("super-tool-v1.0.0", "super-tool"); + test_no_mentions("super-tool-1.0.0-x86_64-linux", "super-tool"); + test_no_mentions("super-mega-tool", "super-mega-tool"); + test_no_mentions("super-mega-tool-linux", "super-mega-tool"); + test_no_mentions("super-mega-tool-v1.0.0", "super-mega-tool"); + test_no_mentions("super-mega-tool-1.0.0-x86_64-linux", "super-mega-tool"); + } + + #[test] + fn name_mention_check_tool_invalid() { + // Contains similar but not exact word + test_some_mentions("tooling", "tool"); + test_some_mentions("tooling-linux", "tool"); + test_some_mentions("tooling-v1.0.0", "tool"); + test_some_mentions("tooling-1.0.0-x86_64-linux", "tool"); + test_some_mentions("tooling-x86_64-linux", "tool"); + test_some_mentions("tooling-x86_64-linux-v1.0.0", "tool"); + // Contains the exact word, but also others + test_some_mentions("super-tool", "tool"); + test_some_mentions("super-tool-linux", "tool"); + test_some_mentions("super-tool-v1.0.0", "tool"); + test_some_mentions("super-tool-1.0.0-x86_64-linux", "tool"); + test_some_mentions("super-mega-tool", "tool"); + test_some_mentions("super-mega-tool-linux", "tool"); + test_some_mentions("super-mega-tool-v1.0.0", "tool"); + test_some_mentions("super-mega-tool-1.0.0-x86_64-linux", "tool"); + } + + #[test] + fn name_mention_check_case_insensitive() { + test_no_mentions("Tool-x86_64-linux", "tool"); + test_no_mentions("tOOl-x86_64-linux", "tool"); + test_no_mentions("TOOL-x86_64-linux", "tool"); + test_some_mentions("Tooling-x86_64-linux", "tool"); + test_some_mentions("tOOling-x86_64-linux", "tool"); + test_some_mentions("TOOLING-x86_64-linux", "tool"); + } + + #[test] + fn name_mention_check_real_tools() { + // Valid + test_no_mentions("wally-v0.3.2-linux.zip", "wally"); + test_no_mentions("lune-0.8.6-macos-aarch64.zip", "lune"); + test_no_mentions("selene-0.27.1-linux.zip", "selene"); + // Invalid + test_some_mentions("selene-light-0.27.1-linux.zip", "selene"); + // Valid - but multiple words + test_no_mentions("sentry-cli-linux-i686-2.32.1", "sentry-cli"); + test_no_mentions("selene-light-0.27.1-linux.zip", "selene-light"); + } +} diff --git a/lib/util/str.rs b/lib/util/str.rs index b17753e..c28a580 100644 --- a/lib/util/str.rs +++ b/lib/util/str.rs @@ -1,5 +1,14 @@ use std::fmt; +/** + Checks if the given character is a "word" separator. + + For internal use only. +*/ +pub(crate) const fn char_is_word_separator(c: char) -> bool { + c.is_ascii_whitespace() || matches!(c, '-' | '_') +} + /** A case-insensitive string wrapper.