Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement improved compat sorting when multiple artifacts are compatible #45

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Fixed

- Fixed `selene` and other tools not being installable because their releases contain multiple archives / binaries ([#45])

[#45]: https://github.com/rojo-rbx/rokit/pull/45

## `0.1.7` - July 15th, 2024

### Fixed
Expand Down
4 changes: 3 additions & 1 deletion lib/descriptor/arch.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::env::consts::ARCH as CURRENT_ARCH;

use super::{char_is_word_separator, executable_parsing::parse_executable, OS};
use crate::util::str::char_is_word_separator;

use super::{executable_parsing::parse_executable, OS};

// Matching substrings - these can be partial matches, eg. "wordwin64" will match as x64 arch
// These will take priority over full word matches, and should be as precise as possible
Expand Down
4 changes: 0 additions & 4 deletions lib/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,6 @@ impl FromStr for Descriptor {
}
}

fn char_is_word_separator(c: char) -> bool {
c == '-' || c == '_' || c.is_ascii_whitespace()
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
4 changes: 3 additions & 1 deletion lib/descriptor/os.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::env::consts::OS as CURRENT_OS;

use super::{char_is_word_separator, executable_parsing::parse_executable};
use crate::util::str::char_is_word_separator;

use super::executable_parsing::parse_executable;

// Matching substrings - these can be partial matches, eg. "wordwin64" will match as windows OS
// These will take priority over full word matches, and should be as precise as possible
Expand Down
67 changes: 30 additions & 37 deletions lib/sources/artifact/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ use super::{

mod format;
mod provider;
mod sorting;
mod util;

use self::sorting::sort_preferred_artifact;
use self::util::split_filename_and_extensions;

pub use self::format::ArtifactFormat;
Expand Down Expand Up @@ -112,7 +114,7 @@ impl Artifact {
}

/**
Sorts the given artifacts by system compatibility.
Sorts the given artifacts by their compatibility with the current system.

See also:

Expand All @@ -121,53 +123,39 @@ impl Artifact {
- [`Descriptor::sort_by_preferred_compat`]
*/
pub fn sort_by_system_compatibility(artifacts: impl AsRef<[Self]>) -> Vec<Self> {
let current_desc = Descriptor::current_system();

let mut compatible_artifacts = artifacts
.as_ref()
.iter()
.filter_map(|artifact| {
let name = artifact.name.as_deref()?;
if let Some(asset_desc) = Descriptor::detect(name) {
if current_desc.is_compatible_with(&asset_desc) {
Some((asset_desc, artifact))
} else {
None
}
} else {
None
}
})
.collect::<Vec<_>>();

compatible_artifacts.sort_by(|(desc_a, _), (desc_b, _)| {
current_desc.sort_by_preferred_compat(desc_a, desc_b)
});

compatible_artifacts
.into_iter()
.map(|(_, artifact)| artifact.clone())
.collect()
Self::sort_by_system_compatibility_inner(artifacts, false)
}

/**
Tries to find a partially compatible artifact, to be used as a fallback
during artifact selection if [`Artifact::sort_by_system_compatibility`]
finds no system-compatible artifacts to use.

Returns `None` if more than one artifact is partially compatible.
Note that this not is guaranteed to be compatible with the current
system, the contents of the artifact should be checked before use.
*/
pub fn find_partially_compatible_fallback(artifacts: impl AsRef<[Self]>) -> Option<Self> {
Self::sort_by_system_compatibility_inner(artifacts, true)
.into_iter()
.next()
}

fn sort_by_system_compatibility_inner(
artifacts: impl AsRef<[Self]>,
allow_partial_compatibility: bool,
) -> Vec<Self> {
let current_desc = Descriptor::current_system();

let os_compatible_artifacts = artifacts
let mut compatible_artifacts = artifacts
.as_ref()
.iter()
.filter_map(|artifact| {
let name = artifact.name.as_deref()?;
if let Some(asset_desc) = Descriptor::detect(name) {
if current_desc.os() == asset_desc.os() {
Some(artifact)
let is_fully_compatible = current_desc.is_compatible_with(&asset_desc);
let is_os_compatible = current_desc.os() == asset_desc.os();
if is_fully_compatible || (allow_partial_compatibility && is_os_compatible) {
Some((asset_desc, artifact))
} else {
None
}
Expand All @@ -177,10 +165,15 @@ impl Artifact {
})
.collect::<Vec<_>>();

if os_compatible_artifacts.len() == 1 {
Some(os_compatible_artifacts[0].clone())
} else {
None
}
compatible_artifacts.sort_by(|(desc_a, artifact_a), (desc_b, artifact_b)| {
current_desc
.sort_by_preferred_compat(desc_a, desc_b)
.then_with(|| sort_preferred_artifact(artifact_a, artifact_b))
});

compatible_artifacts
.into_iter()
.map(|(_, artifact)| artifact.clone())
.collect()
}
}
166 changes: 166 additions & 0 deletions lib/sources/artifact/sorting.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use std::cmp::Ordering;

use semver::Version;

use crate::{
descriptor::{Arch, OS},
tool::ToolId,
util::str::char_is_word_separator,
};

use super::Artifact;

/**
Helper function to sort which artifact is preferred, based on
heuristics such as which tool name mentions the artifact name
more closely, and is probably more desirable for a user.

This **currently** means that, if a tool is named `tool`,
and we have these two artifacts:

- Artifact A: `tool-v1.0.0-x86_64-linux`
- Artifact B: `tool-with-extras-in-name-v1.0.0-x86_64-linux`

Then A would be preferred, because it mentions
the tool name more precisely, and nothing else.

Note that this sorting method is subject to change
and should not be directly exposed in a public API.
*/
pub(super) fn sort_preferred_artifact(artifact_a: &Artifact, artifact_b: &Artifact) -> Ordering {
let count_a = count_non_tool_mentions(
artifact_a.name.as_deref().unwrap_or_default(),
artifact_a.tool_spec.id(),
);
let count_b = count_non_tool_mentions(
artifact_b.name.as_deref().unwrap_or_default(),
artifact_b.tool_spec.id(),
);
count_a.cmp(&count_b)
}

fn count_non_tool_mentions(name: impl AsRef<str>, tool_id: &ToolId) -> usize {
let name = name.as_ref();
if name.trim().is_empty() {
return 0;
}

let name_words = name
.split(char_is_word_separator)
.filter(|s| word_is_not_arch_or_os_or_version_or_numeric(s))
.collect::<Vec<_>>();
let tool_words = tool_id
.name()
.split(char_is_word_separator)
.collect::<Vec<_>>();

#[allow(clippy::cast_possible_wrap)]
let len_diff = ((name_words.len() as isize) - (tool_words.len() as isize)).unsigned_abs();

let mut word_diff = 0;
for (name_word, tool_word) in name_words.into_iter().zip(tool_words) {
if !name_word.eq_ignore_ascii_case(tool_word) {
word_diff += 1;
}
}

len_diff + word_diff
}

fn word_is_not_arch_or_os_or_version_or_numeric(word: impl AsRef<str>) -> bool {
let word = word.as_ref();
Arch::detect(word).is_none()
&& OS::detect(word).is_none()
&& word.trim_start_matches('v').parse::<Version>().is_err()
&& !word.chars().all(char::is_numeric)
}

#[cfg(test)]
mod tests {
use super::*;

fn new_id(author: &str, name: &str) -> ToolId {
format!("{author}/{name}").parse().unwrap()
}

fn test_no_mentions(name: &str, tool_name: &str) {
let tool_id = new_id("author", tool_name);
assert_eq!(
count_non_tool_mentions(name, &tool_id),
0,
"Expected no non-tool mentions in name: {name}"
);
}

fn test_some_mentions(name: &str, tool_name: &str) {
let tool_id = new_id("author", tool_name);
assert_ne!(
count_non_tool_mentions(name, &tool_id),
0,
"Expected non-tool mentions in name: {name}"
);
}

#[test]
fn name_mention_check_tool_valid() {
// Single word tools
test_no_mentions("tool", "tool");
test_no_mentions("tool-linux", "tool");
test_no_mentions("tool-v1.0.0", "tool");
test_no_mentions("tool-1.0.0-x86_64-linux", "tool");
test_no_mentions("tool-x86_64-linux", "tool");
test_no_mentions("tool-x86_64-linux-v1.0.0", "tool");
// Multiple word tools
test_no_mentions("super-tool", "super-tool");
test_no_mentions("super-tool-linux", "super-tool");
test_no_mentions("super-tool-v1.0.0", "super-tool");
test_no_mentions("super-tool-1.0.0-x86_64-linux", "super-tool");
test_no_mentions("super-mega-tool", "super-mega-tool");
test_no_mentions("super-mega-tool-linux", "super-mega-tool");
test_no_mentions("super-mega-tool-v1.0.0", "super-mega-tool");
test_no_mentions("super-mega-tool-1.0.0-x86_64-linux", "super-mega-tool");
}

#[test]
fn name_mention_check_tool_invalid() {
// Contains similar but not exact word
test_some_mentions("tooling", "tool");
test_some_mentions("tooling-linux", "tool");
test_some_mentions("tooling-v1.0.0", "tool");
test_some_mentions("tooling-1.0.0-x86_64-linux", "tool");
test_some_mentions("tooling-x86_64-linux", "tool");
test_some_mentions("tooling-x86_64-linux-v1.0.0", "tool");
// Contains the exact word, but also others
test_some_mentions("super-tool", "tool");
test_some_mentions("super-tool-linux", "tool");
test_some_mentions("super-tool-v1.0.0", "tool");
test_some_mentions("super-tool-1.0.0-x86_64-linux", "tool");
test_some_mentions("super-mega-tool", "tool");
test_some_mentions("super-mega-tool-linux", "tool");
test_some_mentions("super-mega-tool-v1.0.0", "tool");
test_some_mentions("super-mega-tool-1.0.0-x86_64-linux", "tool");
}

#[test]
fn name_mention_check_case_insensitive() {
test_no_mentions("Tool-x86_64-linux", "tool");
test_no_mentions("tOOl-x86_64-linux", "tool");
test_no_mentions("TOOL-x86_64-linux", "tool");
test_some_mentions("Tooling-x86_64-linux", "tool");
test_some_mentions("tOOling-x86_64-linux", "tool");
test_some_mentions("TOOLING-x86_64-linux", "tool");
}

#[test]
fn name_mention_check_real_tools() {
// Valid
test_no_mentions("wally-v0.3.2-linux.zip", "wally");
test_no_mentions("lune-0.8.6-macos-aarch64.zip", "lune");
test_no_mentions("selene-0.27.1-linux.zip", "selene");
// Invalid
test_some_mentions("selene-light-0.27.1-linux.zip", "selene");
// Valid - but multiple words
test_no_mentions("sentry-cli-linux-i686-2.32.1", "sentry-cli");
test_no_mentions("selene-light-0.27.1-linux.zip", "selene-light");
}
}
9 changes: 9 additions & 0 deletions lib/util/str.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
use std::fmt;

/**
Checks if the given character is a "word" separator.

For internal use only.
*/
pub(crate) const fn char_is_word_separator(c: char) -> bool {
c.is_ascii_whitespace() || matches!(c, '-' | '_')
}

/**
A case-insensitive string wrapper.

Expand Down