diff --git a/fixtures/nodejs/v4.9.1/index.html b/fixtures/nodejs/v4.9.1/index.html
new file mode 100644
index 0000000..a35b670
--- /dev/null
+++ b/fixtures/nodejs/v4.9.1/index.html
@@ -0,0 +1,59 @@
+
+
+ Index of /dist/v4.9.1/
+
+
+
+Index of /dist/v4.9.1/
../
+docs/ - -
+win-x64/ - -
+win-x86/ - -
+SHASUMS256.txt.asc 04-Nov-2024 17:40 4.1 KB
+SHASUMS256.txt.sig 04-Nov-2024 17:40 310 B
+SHASUMS256.txt 04-Nov-2024 17:40 3.6 KB
+node-v4.9.1-darwin-x64.tar.gz 30-Oct-2024 18:21 10 MB
+node-v4.9.1-darwin-x64.tar.xz 04-Nov-2024 17:40 7.1 MB
+node-v4.9.1-headers.tar.gz 04-Nov-2024 17:40 471 KB
+node-v4.9.1-headers.tar.xz 04-Nov-2024 17:40 342 KB
+node-v4.9.1-linux-arm64.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-linux-arm64.tar.xz 04-Nov-2024 17:40 7.7 MB
+node-v4.9.1-linux-armv6l.tar.gz 30-Oct-2024 18:21 11 MB
+node-v4.9.1-linux-armv6l.tar.xz 04-Nov-2024 17:40 7.3 MB
+node-v4.9.1-linux-armv7l.tar.gz 30-Oct-2024 18:21 11 MB
+node-v4.9.1-linux-armv7l.tar.xz 04-Nov-2024 17:40 7.3 MB
+node-v4.9.1-linux-ppc64.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-linux-ppc64.tar.xz 04-Nov-2024 17:40 7.5 MB
+node-v4.9.1-linux-ppc64le.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-linux-ppc64le.tar.xz 04-Nov-2024 17:40 7.6 MB
+node-v4.9.1-linux-x64.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-linux-x64.tar.xz 04-Nov-2024 17:40 8.2 MB
+node-v4.9.1-linux-x86.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-linux-x86.tar.xz 04-Nov-2024 17:40 7.8 MB
+node-v4.9.1-sunos-x64.tar.gz 30-Oct-2024 18:21 13 MB
+node-v4.9.1-sunos-x64.tar.xz 30-Oct-2024 18:21 8.4 MB
+node-v4.9.1-sunos-x86.tar.gz 30-Oct-2024 18:21 12 MB
+node-v4.9.1-sunos-x86.tar.xz 04-Nov-2024 17:40 7.7 MB
+node-v4.9.1-win-x64.7z 04-Nov-2024 17:40 6.1 MB
+node-v4.9.1-win-x64.zip 30-Oct-2024 18:21 11 MB
+node-v4.9.1-win-x86.7z 04-Nov-2024 17:40 5.4 MB
+node-v4.9.1-win-x86.zip 30-Oct-2024 18:21 9.6 MB
+node-v4.9.1-x64.msi 30-Oct-2024 18:21 11 MB
+node-v4.9.1-x86.msi 30-Oct-2024 18:21 10.0 MB
+node-v4.9.1.pkg 30-Oct-2024 18:21 13 MB
+node-v4.9.1.tar.gz 30-Oct-2024 18:21 23 MB
+node-v4.9.1.tar.xz 30-Oct-2024 18:21 13 MB
+
+
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 868b51b..56d2570 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -211,7 +211,7 @@ fn assert_if_url_has_no_trailing_slash(url: &Url) {
);
}
-fn get_real_name_from_href(href: &str) -> String {
+fn get_last_part_from_href(href: &str) -> &str {
// Remove trailing slashes for correct name extraction.
let trimmed = href.trim_end_matches('/');
@@ -219,6 +219,12 @@ fn get_real_name_from_href(href: &str) -> String {
let last_slash_pos = trimmed.rfind('/').map(|pos| pos + 1).unwrap_or(0);
let after_last_slash = &trimmed[last_slash_pos..];
+ return after_last_slash;
+}
+
+fn get_real_name_from_href(href: &str) -> String {
+ let after_last_slash = get_last_part_from_href(href);
+
// TODO: this might have issues (inconsistent with other impls)
// Find the position of the first '?' and take the substring before it.
diff --git a/src/parser/nginx.rs b/src/parser/nginx.rs
index 79e3d62..c48ea8c 100644
--- a/src/parser/nginx.rs
+++ b/src/parser/nginx.rs
@@ -1,5 +1,5 @@
/// A parser both suitable for default nginx autoindex and apache f1 format.
-use crate::listing::{FileSize, FileType, ListItem};
+use crate::listing::{FileSize, FileType, ListItem, SizeUnit};
use chrono::NaiveDateTime;
use scraper::{Html, Selector};
use tracing::debug;
@@ -56,7 +56,8 @@ impl Parser for NginxListingParser {
get_real_name_from_href(href)
} else {
// A compromise for apache server (they will NOT url-encode the filename)
- href.to_string()
+ // Just find the last '/' (if exists), and take substring after that
+ get_last_part_from_href(href).to_string()
};
let href = url.join(href)?;
@@ -83,26 +84,49 @@ impl Parser for NginxListingParser {
.to_string();
let metadata_raw = metadata_raw.trim();
debug!("{:?}", metadata_raw);
+ // if it's a directory, and metadata are "- -", do some special handling for the date fmt...
+ // as directory mtime is useless, and nodejs gives us something like this...
+ let mut skip_date = false;
+ if type_ == FileType::Directory
+ && metadata_raw
+ .chars()
+ .filter(|c| !c.is_whitespace())
+ .collect::()
+ == "--"
+ {
+ skip_date = true;
+ }
// guess date format...
- if date_fmt.is_none() {
- let (f, r) = guess_date_fmt(metadata_raw);
- date_fmt = Some(f);
- date_regex = Some(Regex::new(&format!(r"({})\s+([\d\.\-]+ ?[kKMGB]*)$", r))?);
- debug!("date_fmt: {:?} date_regex: {:?}", date_fmt, date_regex)
+ let date;
+ let size;
+ if !skip_date {
+ if date_fmt.is_none() {
+ let (f, r) = guess_date_fmt(metadata_raw);
+ date_fmt = Some(f);
+ date_regex = Some(Regex::new(&format!(r"({})\s+([\d\.\-]+ ?[kKMGB]*)$", r))?);
+ debug!("date_fmt: {:?} date_regex: {:?}", date_fmt, date_regex)
+ }
+ let metadata =
+ date_regex
+ .clone()
+ .unwrap()
+ .captures(metadata_raw)
+ .ok_or(anyhow!(
+ "Get '{}' for {} ({}) metadata, is this a nginx page?",
+ metadata_raw,
+ name,
+ href
+ ))?;
+ date = NaiveDateTime::parse_from_str(
+ metadata.get(1).unwrap().as_str(),
+ &date_fmt.clone().unwrap(),
+ )?;
+ size = metadata.get(2).unwrap().as_str();
+ } else {
+ date = NaiveDateTime::UNIX_EPOCH;
+ size = "-";
}
- let metadata = date_regex
- .clone()
- .unwrap()
- .captures(metadata_raw)
- .ok_or(anyhow!(
- "Get '{}' for {} ({}) metadata, is this a nginx page?",
- metadata_raw,
- name,
- href
- ))?;
- let date = metadata.get(1).unwrap().as_str();
- let date = NaiveDateTime::parse_from_str(date, &date_fmt.clone().unwrap())?;
- let size = metadata.get(2).unwrap().as_str();
+
debug!("{} {} {:?} {} {:?}", href, name, type_, date, size);
items.push(ListItem::new(
href,
@@ -115,9 +139,14 @@ impl Parser for NginxListingParser {
|| size.contains('K')
|| size.contains('M')
|| size.contains('G')
+ || size.contains('B')
{
let (n_size, unit) = FileSize::get_humanized(size);
- Some(FileSize::HumanizedBinary(n_size, unit))
+ if unit != SizeUnit::B {
+ Some(FileSize::HumanizedBinary(n_size, unit))
+ } else {
+ Some(FileSize::Precise(n_size as u64)) // workaround
+ }
} else {
let n_size = size.parse::().unwrap();
Some(FileSize::Precise(n_size))
@@ -322,4 +351,35 @@ mod tests {
_ => unreachable!(),
}
}
+
+ #[test]
+ fn test_nodejs() {
+ let context = init_async_context();
+ let items = NginxListingParser::default()
+ .get_list(
+ &context,
+ &url::Url::parse("http://localhost:1921/nodejs/v4.9.1/").unwrap(),
+ )
+ .unwrap();
+ match items {
+ ListResult::List(items) => {
+ assert_eq!(items.len(), 37);
+ assert_eq!(items[0].name, "docs");
+ assert_eq!(items[0].type_, FileType::Directory);
+ assert_eq!(items[0].size, None);
+ assert_eq!(
+ items[0].mtime,
+ NaiveDateTime::UNIX_EPOCH, // No mtime
+ );
+ assert_eq!(items[3].name, "SHASUMS256.txt.asc");
+ assert_eq!(items[3].type_, FileType::File);
+ assert_eq!(items[3].size, Some(FileSize::HumanizedBinary(4.1, SizeUnit::K)));
+ assert_eq!(
+ items[3].mtime,
+ NaiveDateTime::parse_from_str("2024-11-04 17:40", "%Y-%m-%d %H:%M").unwrap()
+ );
+ }
+ _ => unreachable!(),
+ }
+ }
}