From 6f6794cffbf82f1a9be52b2a95b548fb056735e6 Mon Sep 17 00:00:00 2001 From: taoky Date: Sun, 25 Aug 2024 19:40:01 +0800 Subject: [PATCH] sync: Recursively find first file for timezone guessing Some repos like https://images.lxd.canonical.com/images/ do not have files at root and do not have a fixed URL for this purpose. --- README.md | 2 +- src/cli/sync.rs | 52 +++++++++++++++++++++++++++++++++---------------- src/main.rs | 3 ++- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 21a41c2..9fd6cc1 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Options: --max-delete Set max delete count [default: 100] --timezone-file - Default: auto. You can set a valid URL for guessing, or an invalid one for disabling + You can set a valid URL for guessing, or an invalid one for disabling. By default it would recursivelly find the first file to HEAD for guessing --timezone Manually set timezone (+- hrs). This overrides timezone_file --retry diff --git a/src/cli/sync.rs b/src/cli/sync.rs index 25fb3b0..b14826e 100644 --- a/src/cli/sync.rs +++ b/src/cli/sync.rs @@ -21,7 +21,7 @@ use url::Url; use crate::{ compare::{should_download_by_header, should_download_by_list}, extensions::{extension_handler, ExtensionPackage}, - listing::{self, ListItem}, + listing::{self, FileType, ListItem}, parser::ListResult, regex_process::{self, ExclusionManager}, term::AlternativeTerm, @@ -86,26 +86,44 @@ fn determinate_timezone( }, None => { // eek, try getting first file in root index - let list = again( - || parser.get_list(async_context, &args.upstream), - args.retry, - ) - .unwrap(); - match list { - ListResult::List(list) => { - match list.iter().find(|x| x.type_ == listing::FileType::File) { - None => { - warn!("No files in root index, disabling timezone guessing"); - None + fn find_first_file( + args: &SyncArgs, + parser: &dyn crate::parser::Parser, + async_context: &AsyncContext, + url: &Url, + ) -> Option { + info!("Try finding first File in {}", url); + let list = + again(|| parser.get_list(async_context, url), args.retry).unwrap(); + match list { + ListResult::List(list) => { + for item in list { + match item.type_ { + FileType::File => { + info!("Find a file! URL: {}", item.url); + return Some(item.url); + } + FileType::Directory => { + if let Some(file) = find_first_file( + args, + parser, + async_context, + &item.url, + ) { + return Some(file); + } + } + } } - Some(x) => Some(x.url.clone()), + None + } + ListResult::Redirect(_) => { + info!("Get a manual redirect instead of a file"); + None } - } - ListResult::Redirect(_) => { - warn!("Root index is a redirect, disabling timezone guessing"); - None } } + find_first_file(args, parser, async_context, &args.upstream) } }; match timezone_file { diff --git a/src/main.rs b/src/main.rs index baf960f..7a85069 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,7 +87,8 @@ pub struct SyncArgs { #[clap(value_parser)] local: PathBuf, - /// Default: auto. You can set a valid URL for guessing, or an invalid one for disabling. + /// You can set a valid URL for guessing, or an invalid one for disabling. + /// By default it would recursivelly find the first file to HEAD for guessing #[clap(long)] timezone_file: Option,