Skip to content

Commit

Permalink
parser: Update nginx parser for nodejs/dist cases
Browse files Browse the repository at this point in the history
  • Loading branch information
taoky committed Nov 12, 2024
1 parent 665d984 commit 2965743
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 22 deletions.
59 changes: 59 additions & 0 deletions fixtures/nodejs/v4.9.1/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<!DOCTYPE html><html>
<head>
<title>Index of /dist/v4.9.1/</title>
<style>
@media (prefers-color-scheme: dark) {
body {
color: white;
background-color: #1c1b22;
}
a {
color: #3391ff;
}
a:visited {
color: #C63B65;
}
}
</style>
</head>
<body>
<h1>Index of /dist/v4.9.1/</h1><hr><pre><a href="../">../</a>
<a href="docs/">docs/</a> - -
<a href="win-x64/">win-x64/</a> - -
<a href="win-x86/">win-x86/</a> - -
<a href="/dist/v4.9.1/SHASUMS256.txt.asc">SHASUMS256.txt.asc</a> 04-Nov-2024 17:40 4.1 KB
<a href="/dist/v4.9.1/SHASUMS256.txt.sig">SHASUMS256.txt.sig</a> 04-Nov-2024 17:40 310 B
<a href="/dist/v4.9.1/SHASUMS256.txt">SHASUMS256.txt</a> 04-Nov-2024 17:40 3.6 KB
<a href="/dist/v4.9.1/node-v4.9.1-darwin-x64.tar.gz">node-v4.9.1-darwin-x64.tar.gz</a> 30-Oct-2024 18:21 10 MB
<a href="/dist/v4.9.1/node-v4.9.1-darwin-x64.tar.xz">node-v4.9.1-darwin-x64.tar.xz</a> 04-Nov-2024 17:40 7.1 MB
<a href="/dist/v4.9.1/node-v4.9.1-headers.tar.gz">node-v4.9.1-headers.tar.gz</a> 04-Nov-2024 17:40 471 KB
<a href="/dist/v4.9.1/node-v4.9.1-headers.tar.xz">node-v4.9.1-headers.tar.xz</a> 04-Nov-2024 17:40 342 KB
<a href="/dist/v4.9.1/node-v4.9.1-linux-arm64.tar.gz">node-v4.9.1-linux-arm64.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-arm64.tar.xz">node-v4.9.1-linux-arm64.tar.xz</a> 04-Nov-2024 17:40 7.7 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-armv6l.tar.gz">node-v4.9.1-linux-armv6l.tar.gz</a> 30-Oct-2024 18:21 11 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-armv6l.tar.xz">node-v4.9.1-linux-armv6l.tar.xz</a> 04-Nov-2024 17:40 7.3 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-armv7l.tar.gz">node-v4.9.1-linux-armv7l.tar.gz</a> 30-Oct-2024 18:21 11 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-armv7l.tar.xz">node-v4.9.1-linux-armv7l.tar.xz</a> 04-Nov-2024 17:40 7.3 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64.tar.gz">node-v4.9.1-linux-ppc64.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64.tar.xz">node-v4.9.1-linux-ppc64.tar.xz</a> 04-Nov-2024 17:40 7.5 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64le.tar.gz">node-v4.9.1-linux-ppc64le.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-ppc64le.tar.xz">node-v4.9.1-linux-ppc64le.tar.xz</a> 04-Nov-2024 17:40 7.6 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-x64.tar.gz">node-v4.9.1-linux-x64.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-x64.tar.xz">node-v4.9.1-linux-x64.tar.xz</a> 04-Nov-2024 17:40 8.2 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-x86.tar.gz">node-v4.9.1-linux-x86.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-linux-x86.tar.xz">node-v4.9.1-linux-x86.tar.xz</a> 04-Nov-2024 17:40 7.8 MB
<a href="/dist/v4.9.1/node-v4.9.1-sunos-x64.tar.gz">node-v4.9.1-sunos-x64.tar.gz</a> 30-Oct-2024 18:21 13 MB
<a href="/dist/v4.9.1/node-v4.9.1-sunos-x64.tar.xz">node-v4.9.1-sunos-x64.tar.xz</a> 30-Oct-2024 18:21 8.4 MB
<a href="/dist/v4.9.1/node-v4.9.1-sunos-x86.tar.gz">node-v4.9.1-sunos-x86.tar.gz</a> 30-Oct-2024 18:21 12 MB
<a href="/dist/v4.9.1/node-v4.9.1-sunos-x86.tar.xz">node-v4.9.1-sunos-x86.tar.xz</a> 04-Nov-2024 17:40 7.7 MB
<a href="/dist/v4.9.1/node-v4.9.1-win-x64.7z">node-v4.9.1-win-x64.7z</a> 04-Nov-2024 17:40 6.1 MB
<a href="/dist/v4.9.1/node-v4.9.1-win-x64.zip">node-v4.9.1-win-x64.zip</a> 30-Oct-2024 18:21 11 MB
<a href="/dist/v4.9.1/node-v4.9.1-win-x86.7z">node-v4.9.1-win-x86.7z</a> 04-Nov-2024 17:40 5.4 MB
<a href="/dist/v4.9.1/node-v4.9.1-win-x86.zip">node-v4.9.1-win-x86.zip</a> 30-Oct-2024 18:21 9.6 MB
<a href="/dist/v4.9.1/node-v4.9.1-x64.msi">node-v4.9.1-x64.msi</a> 30-Oct-2024 18:21 11 MB
<a href="/dist/v4.9.1/node-v4.9.1-x86.msi">node-v4.9.1-x86.msi</a> 30-Oct-2024 18:21 10.0 MB
<a href="/dist/v4.9.1/node-v4.9.1.pkg">node-v4.9.1.pkg</a> 30-Oct-2024 18:21 13 MB
<a href="/dist/v4.9.1/node-v4.9.1.tar.gz">node-v4.9.1.tar.gz</a> 30-Oct-2024 18:21 23 MB
<a href="/dist/v4.9.1/node-v4.9.1.tar.xz">node-v4.9.1.tar.xz</a> 30-Oct-2024 18:21 13 MB
</pre><hr /></body>
</html>
8 changes: 7 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,14 +211,20 @@ fn assert_if_url_has_no_trailing_slash(url: &Url) {
);
}

fn get_real_name_from_href(href: &str) -> String {
fn get_last_part_from_href(href: &str) -> &str {
// Remove trailing slashes for correct name extraction.
let trimmed = href.trim_end_matches('/');

// Find the position of the last '/' and take the substring after it.
let last_slash_pos = trimmed.rfind('/').map(|pos| pos + 1).unwrap_or(0);
let after_last_slash = &trimmed[last_slash_pos..];

return after_last_slash;
}

fn get_real_name_from_href(href: &str) -> String {
let after_last_slash = get_last_part_from_href(href);

// TODO: this might have issues (inconsistent with other impls)

// Find the position of the first '?' and take the substring before it.
Expand Down
102 changes: 81 additions & 21 deletions src/parser/nginx.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/// A parser both suitable for default nginx autoindex and apache f1 format.
use crate::listing::{FileSize, FileType, ListItem};
use crate::listing::{FileSize, FileType, ListItem, SizeUnit};
use chrono::NaiveDateTime;
use scraper::{Html, Selector};
use tracing::debug;
Expand Down Expand Up @@ -56,7 +56,8 @@ impl Parser for NginxListingParser {
get_real_name_from_href(href)
} else {
// A compromise for apache server (they will NOT url-encode the filename)
href.to_string()
// Just find the last '/' (if exists), and take substring after that
get_last_part_from_href(href).to_string()
};
let href = url.join(href)?;

Expand All @@ -83,26 +84,49 @@ impl Parser for NginxListingParser {
.to_string();
let metadata_raw = metadata_raw.trim();
debug!("{:?}", metadata_raw);
// if it's a directory, and metadata are "- -", do some special handling for the date fmt...
// as directory mtime is useless, and nodejs gives us something like this...
let mut skip_date = false;
if type_ == FileType::Directory
&& metadata_raw
.chars()
.filter(|c| !c.is_whitespace())
.collect::<String>()
== "--"
{
skip_date = true;
}
// guess date format...
if date_fmt.is_none() {
let (f, r) = guess_date_fmt(metadata_raw);
date_fmt = Some(f);
date_regex = Some(Regex::new(&format!(r"({})\s+([\d\.\-]+ ?[kKMGB]*)$", r))?);
debug!("date_fmt: {:?} date_regex: {:?}", date_fmt, date_regex)
let date;
let size;
if !skip_date {
if date_fmt.is_none() {
let (f, r) = guess_date_fmt(metadata_raw);
date_fmt = Some(f);
date_regex = Some(Regex::new(&format!(r"({})\s+([\d\.\-]+ ?[kKMGB]*)$", r))?);
debug!("date_fmt: {:?} date_regex: {:?}", date_fmt, date_regex)
}
let metadata =
date_regex
.clone()
.unwrap()
.captures(metadata_raw)
.ok_or(anyhow!(
"Get '{}' for {} ({}) metadata, is this a nginx page?",
metadata_raw,
name,
href
))?;
date = NaiveDateTime::parse_from_str(
metadata.get(1).unwrap().as_str(),
&date_fmt.clone().unwrap(),
)?;
size = metadata.get(2).unwrap().as_str();
} else {
date = NaiveDateTime::UNIX_EPOCH;
size = "-";
}
let metadata = date_regex
.clone()
.unwrap()
.captures(metadata_raw)
.ok_or(anyhow!(
"Get '{}' for {} ({}) metadata, is this a nginx page?",
metadata_raw,
name,
href
))?;
let date = metadata.get(1).unwrap().as_str();
let date = NaiveDateTime::parse_from_str(date, &date_fmt.clone().unwrap())?;
let size = metadata.get(2).unwrap().as_str();

debug!("{} {} {:?} {} {:?}", href, name, type_, date, size);
items.push(ListItem::new(
href,
Expand All @@ -115,9 +139,14 @@ impl Parser for NginxListingParser {
|| size.contains('K')
|| size.contains('M')
|| size.contains('G')
|| size.contains('B')
{
let (n_size, unit) = FileSize::get_humanized(size);
Some(FileSize::HumanizedBinary(n_size, unit))
if unit != SizeUnit::B {
Some(FileSize::HumanizedBinary(n_size, unit))
} else {
Some(FileSize::Precise(n_size as u64)) // workaround
}
} else {
let n_size = size.parse::<u64>().unwrap();
Some(FileSize::Precise(n_size))
Expand Down Expand Up @@ -322,4 +351,35 @@ mod tests {
_ => unreachable!(),
}
}

#[test]
fn test_nodejs() {
let context = init_async_context();
let items = NginxListingParser::default()
.get_list(
&context,
&url::Url::parse("http://localhost:1921/nodejs/v4.9.1/").unwrap(),
)
.unwrap();
match items {
ListResult::List(items) => {
assert_eq!(items.len(), 37);
assert_eq!(items[0].name, "docs");
assert_eq!(items[0].type_, FileType::Directory);
assert_eq!(items[0].size, None);
assert_eq!(
items[0].mtime,
NaiveDateTime::UNIX_EPOCH, // No mtime
);
assert_eq!(items[3].name, "SHASUMS256.txt.asc");
assert_eq!(items[3].type_, FileType::File);
assert_eq!(items[3].size, Some(FileSize::HumanizedBinary(4.1, SizeUnit::K)));
assert_eq!(
items[3].mtime,
NaiveDateTime::parse_from_str("2024-11-04 17:40", "%Y-%m-%d %H:%M").unwrap()
);
}
_ => unreachable!(),
}
}
}

0 comments on commit 2965743

Please sign in to comment.