diff --git a/Cargo.toml b/Cargo.toml index 14f5c37..780e3d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,4 +15,6 @@ serde_json = "1.0.117" serde = { version = "1.0", features = ["derive"] } tempfile = "3.10.1" ureq = "2.9.7" - +font-types = { version = "0.7", features= ["serde"] } +thiserror = "1.0.37" +serde_yaml = "0.9.14" diff --git a/README.md b/README.md index a398c3d..44e697a 100644 --- a/README.md +++ b/README.md @@ -10,4 +10,16 @@ For each repository we find, we then look for a `config.yaml` file in that repository's `/source` directory, which is present by convention on sources intended to be built by Google Fonts. +# use + +To use this tool from the command line, in order to generate a JSON dictionary +containing information about source repositories: + +```sh +cargo run -- -o repo_list.json +``` + +To use this tool from another Rust crate, see [the docs]. + [metadata file]: https://github.com/googlefonts/gftools/blob/main/Lib/gftools/fonts_public.proto +[the docs]: https://docs.rs/google-fonts-sources/ diff --git a/src/args.rs b/src/args.rs index dbf4955..ba9cba0 100644 --- a/src/args.rs +++ b/src/args.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; #[derive(Clone, Debug, Default, clap::Parser)] #[command(version, about)] +#[doc(hidden)] // only intended to be used from our binary pub struct Args { /// Path to local checkout of google/fonts repository #[arg(short, long)] diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..8d28cee --- /dev/null +++ b/src/config.rs @@ -0,0 +1,34 @@ +//! parsing google fonts config files + +use std::path::Path; + +use font_types::Tag; + +use crate::error::BadConfig; + +/// Google fonts config file ('config.yaml') +/// +/// This is a standard file that describes the sources and steps for building a +/// font. See [googlefonts-project-template][template]. +/// +/// [template]: https://github.com/googlefonts/googlefonts-project-template/blob/main/sources/config.yaml +#[derive(Clone, Debug, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +// there are a bunch of other fields here we may need to add in the future +#[non_exhaustive] +pub struct Config { + pub sources: Vec, + pub family_name: Option, + #[serde(default)] + pub build_variable: bool, + #[serde(default)] + pub axis_order: Vec, +} + +impl Config { + /// Parse and return a config.yaml file for the provided font source + pub fn load(config_path: &Path) -> Result { + let contents = std::fs::read_to_string(config_path)?; + serde_yaml::from_str(&contents).map_err(BadConfig::Yaml) + } +} diff --git a/src/error.rs b/src/error.rs index 24e11a0..0071a68 100644 --- a/src/error.rs +++ b/src/error.rs @@ -22,6 +22,62 @@ impl UnwrapOrDie for Result { } } +/// Errors that occur while trying to load a config file +#[derive(Debug, thiserror::Error)] +pub enum BadConfig { + /// The file could not be read + #[error(transparent)] + Read(#[from] std::io::Error), + /// The yaml could not be parsed + #[error(transparent)] + Yaml(serde_yaml::Error), +} + +/// Things that go wrong when trying to clone and read a font repo +#[derive(Debug, thiserror::Error)] +pub enum LoadRepoError { + #[error("could not create local directory: '{0}'")] + Io( + #[from] + #[source] + std::io::Error, + ), + #[error("git failed: '{0}'")] + GitFail( + #[source] + #[from] + GitFail, + ), + /// The expected commit could not be found + #[error("could not find commit '{sha}'")] + NoCommit { sha: String }, + + /// No config file was found + #[error("no config file was found")] + NoConfig, + #[error("couldn't load config file: '{0}'")] + BadConfig( + #[source] + #[from] + BadConfig, + ), +} + +/// Things that go wrong when trying to run a git command +#[derive(Debug, thiserror::Error)] +pub enum GitFail { + /// The git command itself does not execute + #[error("process failed: '{0}'")] + ProcessFailed( + #[from] + #[source] + std::io::Error, + ), + /// The git command returns a non-zero status + #[error("command failed: '{0}'")] + GitError(String), +} + pub(crate) enum MetadataError { Read(std::io::Error), Parse(BadMetadata), diff --git a/src/lib.rs b/src/lib.rs index 53739a3..0ce3963 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,27 @@ //! Finding sources for Google Fonts fonts +//! +//! # basic usage: +//! +//! ``` +//! // get a list of repositories: +//! +//! let font_repo_cache = Path::new("~/where_i_want_to_checkout_fonts"); +//! let font_repos = google_fonts_sources::discover_sources(None, Some(font_repo_cache), false) +//! +//! // for each repo we find, do something with each source: +//! +//! for repo in &font_repos { +//! let sources = match repo.get_sources(font_repo_cache) { +//! Ok(sources) => sources, +//! Err(e) => { +//! eprintln!("skipping repo '{}': '{e}'", repo.repo_name); +//! continue; +//! } +//! }; +//! +//! println!("repo '{}' contains sources {sources:?}", repo.repo_name); +//! } +//! ``` use std::{ collections::{BTreeMap, BTreeSet, HashSet}, @@ -14,34 +37,25 @@ use std::{ use kdam::{tqdm, BarExt}; mod args; +mod config; mod error; mod metadata; +mod repo_info; pub use args::Args; -use error::{MetadataError, UnwrapOrDie}; +pub use config::Config; +pub use error::{BadConfig, LoadRepoError}; +use error::{GitFail, MetadataError, UnwrapOrDie}; use metadata::Metadata; +pub use repo_info::RepoInfo; static GF_REPO_URL: &str = "https://github.com/google/fonts"; static METADATA_FILE: &str = "METADATA.pb"; type GitRev = String; -/// Information about a font repository -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] -pub struct RepoInfo { - /// The name of the repository. - /// - /// This is everything after the trailing '/' in e.g. `https://github.com/PaoloBiagini/Joan` - pub repo_name: String, - /// The repository's url - pub repo_url: String, - /// The commit rev of the repository's main branch - pub rev: String, - /// The names of config files that exist in this repository's source directory - pub config_files: Vec, -} - /// entry point for the cli tool +#[doc(hidden)] // only intended to be used from our binary pub fn run(args: &Args) { let repos = discover_sources( args.repo_path.as_deref(), @@ -68,6 +82,16 @@ pub fn run(args: &Args) { /// This looks at every font in the google/fonts github repo, looks to see if /// we have a known upstream repository for that font, and then looks to see if /// that repo contains a config.yaml file. +/// +/// The 'fonts_repo_path' is the path to a local checkout of the [google/fonts] +/// repository. If this is `None`, we will clone that repository to a tempdir. +/// +/// The 'sources_dir' is the path to a directory where repositories will be +/// checked out, if necessary. Because we check out lots of repos (and it is +/// likely that the caller will want to check these out again later) it makes +/// sense to cache these in most cases. +/// +/// [google/fonts]: https://github.com/google/fonts pub fn discover_sources( fonts_repo_path: Option<&Path>, sources_dir: Option<&Path>, @@ -226,7 +250,8 @@ fn find_config_files( } Err(e) => { let msg = match e { - ConfigFetchIssue::BadRepoUrl(s) | ConfigFetchIssue::GitFail(s) => s, + ConfigFetchIssue::BadRepoUrl(s) => s, + ConfigFetchIssue::GitFail(e) => e.to_string(), ConfigFetchIssue::Http(e) => e.to_string(), _ => unreachable!(), // handled above }; @@ -290,7 +315,7 @@ enum ConfigFetchIssue { RateLimit(usize), BadRepoUrl(String), // contains stderr - GitFail(String), + GitFail(GitFail), Http(Box), } @@ -308,7 +333,7 @@ fn config_files_and_rev_for_repo( // - and then finally clone the repo and look let local_git_dir = local_repo_dir.join(".git"); if local_git_dir.exists() { - let rev = get_git_rev(&local_repo_dir); + let rev = get_git_rev(&local_repo_dir).map_err(ConfigFetchIssue::GitFail)?; let configs = get_config_paths(&local_repo_dir).ok_or(ConfigFetchIssue::NoConfigFound)?; return Ok((configs, rev)); } @@ -319,7 +344,7 @@ fn config_files_and_rev_for_repo( naive } else { let configs = config_files_from_local_checkout(repo_url, &local_repo_dir)?; - let rev = get_git_rev(&local_repo_dir); + let rev = get_git_rev(&local_repo_dir).map_err(ConfigFetchIssue::GitFail)?; Ok((configs, rev)) } } @@ -415,6 +440,9 @@ fn get_candidates_from_remote(verbose: bool) -> BTreeSet { fn get_candidates_from_local_checkout(path: &Path, verbose: bool) -> BTreeSet { let ofl_dir = path.join("ofl"); + if verbose { + eprintln!("searching for candidates in {}", ofl_dir.display()); + } let mut result = BTreeSet::new(); for font_dir in iter_ofl_subdirectories(&ofl_dir) { let metadata = match load_metadata(&font_dir) { @@ -431,19 +459,6 @@ fn get_candidates_from_local_checkout(path: &Path, verbose: bool) -> BTreeSet String { - let output = std::process::Command::new("git") - .arg("rev-parse") - .arg("HEAD") - .current_dir(repo_path) - .output() - .expect("git rev-parse HEAD should not fail if repo exists"); - std::str::from_utf8(&output.stdout) - .expect("rev is always ascii/hex string") - .trim() - .to_owned() -} - fn get_git_rev_remote(repo_url: &str) -> Result { let output = std::process::Command::new("git") .arg("ls-remote") @@ -460,6 +475,58 @@ fn get_git_rev_remote(repo_url: &str) -> Result { Ok(sha) } +/// Get the short sha of the current commit in the provided repository. +/// +/// If no repo provided, run in current directory +/// +/// returns `None` if the `git` command fails (for instance if the path is not +/// a git repository) +fn get_git_rev(repo_path: &Path) -> Result { + let mut cmd = std::process::Command::new("git"); + cmd.args(["rev-parse", "--short", "HEAD"]) + .current_dir(repo_path); + let output = cmd.output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(GitFail::GitError(stderr.into_owned())); + } + + Ok(std::str::from_utf8(&output.stdout) + .expect("rev is always ascii/hex string") + .trim() + .to_owned()) +} + +// try to checkout this rev. +// +// returns `true` if successful, `false` otherwise (indicating a git error) +fn checkout_rev(repo_dir: &Path, rev: &str) -> Result { + let sha = get_git_rev(repo_dir)?; + // the longer str is on the left, so we check if shorter str is a prefix + let (left, right) = if sha.len() > rev.len() { + (sha.as_str(), rev) + } else { + (rev, sha.as_str()) + }; + if left.starts_with(right) { + return Ok(true); + } + // checkouts might be shallow, so unshallow before looking for a rev: + let _ = std::process::Command::new("git") + .current_dir(repo_dir) + .args(["fetch", "--unshallow"]) + .status(); + + std::process::Command::new("git") + .current_dir(repo_dir) + .arg("checkout") + .arg(rev) + .status() + .map(|stat| stat.success()) + .map_err(Into::into) +} + fn load_metadata(path: &Path) -> Result { let meta_path = path.join(METADATA_FILE); Metadata::load(&meta_path) @@ -471,8 +538,7 @@ fn iter_ofl_subdirectories(path: &Path) -> impl Iterator { contents.filter_map(|entry| entry.ok().map(|d| d.path()).filter(|p| p.is_dir())) } -// on fail returns contents of stderr -fn clone_repo(url: &str, to_dir: &Path) -> Result<(), String> { +fn clone_repo(url: &str, to_dir: &Path) -> Result<(), GitFail> { assert!(to_dir.exists()); let output = std::process::Command::new("git") // if a repo requires credentials fail instead of waiting @@ -481,12 +547,11 @@ fn clone_repo(url: &str, to_dir: &Path) -> Result<(), String> { .args(["--depth", "1"]) .arg(url) .arg(to_dir) - .output() - .expect("failed to execute git command"); + .output()?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - return Err(stderr.into_owned()); + return Err(GitFail::GitError(stderr.into_owned())); } Ok(()) } diff --git a/src/repo_info.rs b/src/repo_info.rs new file mode 100644 index 0000000..96a7263 --- /dev/null +++ b/src/repo_info.rs @@ -0,0 +1,64 @@ +//! font repository information + +use std::path::{Path, PathBuf}; + +use crate::{error::LoadRepoError, Config}; + +/// Information about a git repository containing font sources +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] +pub struct RepoInfo { + /// The name of the repository. + /// + /// This is everything after the trailing '/' in e.g. `https://github.com/PaoloBiagini/Joan` + pub repo_name: String, + /// The repository's url + pub repo_url: String, + /// The commit rev of the repository's main branch, at discovery time. + pub rev: String, + /// The names of config files that exist in this repository's source directory + pub config_files: Vec, +} + +impl RepoInfo { + /// Return the a `Vec` of source files in this respository. + /// + /// If necessary, this will create a new checkout of this repo at + /// '{font_dir}/{repo_name}'. + pub fn get_sources(&self, font_repos_dir: &Path) -> Result, LoadRepoError> { + let font_dir = font_repos_dir.join(&self.repo_name); + + if !font_dir.exists() { + std::fs::create_dir_all(&font_dir)?; + super::clone_repo(&self.repo_url, &font_dir)?; + } + + if !super::checkout_rev(&font_dir, &self.rev)? { + return Err(LoadRepoError::NoCommit { + sha: self.rev.clone(), + }); + } + + let source_dir = font_dir.join("sources"); + let configs = self + .config_files + .iter() + .map(|filename| { + let config_path = source_dir.join(filename); + Config::load(&config_path) + }) + .collect::, _>>()?; + if configs.is_empty() { + return Err(LoadRepoError::NoConfig); + } + + let mut sources = configs + .iter() + .flat_map(|c| c.sources.iter()) + .map(|source| source_dir.join(source)) + .collect::>(); + sources.sort_unstable(); + sources.dedup(); + + Ok(sources) + } +}