diff --git a/src/component.rs b/src/component.rs index ba62c35..f43e38c 100644 --- a/src/component.rs +++ b/src/component.rs @@ -11,6 +11,7 @@ use crate::parser::FULL_WILDCARD_REGEXP_VALUE; use crate::regexp::RegExp; use crate::tokenizer::is_valid_name_codepoint; use crate::Error; +use std::fmt::Write; // Ref: https://wicg.github.io/urlpattern/#component #[derive(Debug)] @@ -40,9 +41,10 @@ impl Component { let part_list = part_list.iter().collect::>(); let (regexp_string, name_list) = generate_regular_expression_and_name_list(&part_list, &options); - let regexp = R::parse(®exp_string).map_err(Error::RegExp); + let flags = if options.ignore_case { "ui" } else { "u" }; + let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp); let pattern_string = generate_pattern_string(&part_list, &options); - let matcher = generate_matcher::(&part_list, &options); + let matcher = generate_matcher::(&part_list, &options, flags); Ok(Component { pattern_string, regexp, @@ -106,11 +108,13 @@ fn generate_regular_expression_and_name_list( if part.modifier == PartModifier::None { result.push_str(&options.escape_regexp_string(&part.value)); } else { - result.push_str(&format!( + write!( + result, "(?:{}){}", options.escape_regexp_string(&part.value), part.modifier - )); + ) + .unwrap(); } continue; } @@ -127,24 +131,27 @@ fn generate_regular_expression_and_name_list( if part.prefix.is_empty() && part.suffix.is_empty() { if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - result.push_str(&format!("({}){}", regexp_value, part.modifier)); + write!(result, "({}){}", regexp_value, part.modifier).unwrap(); } else { - result.push_str(&format!("((?:{}){})", regexp_value, part.modifier)); + write!(result, "((?:{}){})", regexp_value, part.modifier).unwrap(); } continue; } if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - result.push_str(&format!( + write!( + result, "(?:{}({}){}){}", options.escape_regexp_string(&part.prefix), regexp_value, options.escape_regexp_string(&part.suffix), part.modifier - )); + ) + .unwrap(); continue; } assert!(!part.prefix.is_empty() || !part.suffix.is_empty()); - result.push_str(&format!( + write!( + result, "(?:{}((?:{})(?:{}{}(?:{}))*){}){}", options.escape_regexp_string(&part.prefix), regexp_value, @@ -153,11 +160,12 @@ fn generate_regular_expression_and_name_list( regexp_value, options.escape_regexp_string(&part.suffix), if part.modifier == PartModifier::ZeroOrMore { - "?" // TODO: https://github.com/WICG/urlpattern/issues/91 + "?" } else { "" } - )); + ) + .unwrap(); } result.push('$'); (result, name_list) @@ -178,11 +186,13 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { result.push_str(&escape_pattern_string(&part.value)); continue; } - result.push_str(&format!( + write!( + result, "{{{}}}{}", escape_pattern_string(&part.value), part.modifier - )); + ) + .unwrap(); continue; } let custom_name = !part.name.chars().next().unwrap().is_ascii_digit(); @@ -229,9 +239,11 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { } match part.kind { PartType::FixedText => unreachable!(), - PartType::Regexp => result.push_str(&format!("({})", part.value)), - PartType::SegmentWildcard if !custom_name => result - .push_str(&format!("({})", options.generate_segment_wildcard_regexp())), + PartType::Regexp => write!(result, "({})", part.value).unwrap(), + PartType::SegmentWildcard if !custom_name => { + write!(result, "({})", options.generate_segment_wildcard_regexp()) + .unwrap() + } PartType::SegmentWildcard => {} PartType::FullWildcard => { if !custom_name @@ -267,6 +279,7 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { fn generate_matcher( mut part_list: &[&Part], options: &Options, + flags: &str, ) -> Matcher { fn is_literal(part: &Part) -> bool { part.kind == PartType::FixedText && part.modifier == PartModifier::None @@ -292,7 +305,14 @@ fn generate_matcher( // If there are no more parts, we must have a prefix and/or a suffix. We can // combine these into a single fixed text literal matcher. if part_list.is_empty() { - return Matcher::literal(format!("{prefix}{suffix}")); + return Matcher { + prefix: "".to_string(), + suffix: "".to_string(), + inner: InnerMatcher::Literal { + literal: format!("{prefix}{suffix}"), + }, + ignore_case: options.ignore_case, + }; } let inner = match part_list { @@ -330,7 +350,7 @@ fn generate_matcher( part_list => { let (regexp_string, _) = generate_regular_expression_and_name_list(part_list, options); - let regexp = R::parse(®exp_string).map_err(Error::RegExp); + let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp); InnerMatcher::RegExp { regexp } } }; @@ -339,5 +359,6 @@ fn generate_matcher( prefix, suffix, inner, + ignore_case: options.ignore_case, } } diff --git a/src/lib.rs b/src/lib.rs index 7e54d47..7a73226 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,14 +16,24 @@ mod regexp; mod tokenizer; pub use error::Error; +use serde::Deserialize; +use serde::Serialize; use url::Url; +use crate::canonicalize_and_process::is_special_scheme; +use crate::canonicalize_and_process::process_base_url; use crate::canonicalize_and_process::special_scheme_default_port; use crate::canonicalize_and_process::ProcessType; -use crate::canonicalize_and_process::{is_special_scheme, process_base_url}; use crate::component::Component; use crate::regexp::RegExp; +/// Options to create a URL pattern. +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct UrlPatternOptions { + pub ignore_case: bool, +} + /// The structured input used to create a URL pattern. #[derive(Debug, Default, Clone, Eq, PartialEq)] pub struct UrlPatternInit { @@ -264,7 +274,7 @@ fn is_absolute_pathname( /// pathname: Some("/users/:id".to_owned()), /// ..Default::default() /// }; -/// let pattern = ::parse(init).unwrap(); +/// let pattern = ::parse(init, Default::default()).unwrap(); /// /// // Match the pattern against a URL. /// let url = "https://example.com/users/123".parse().unwrap(); @@ -293,13 +303,17 @@ pub enum UrlPatternMatchInput { impl UrlPattern { // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-urlpattern /// Parse a [UrlPatternInit] into a [UrlPattern]. - pub fn parse(init: UrlPatternInit) -> Result { - Self::parse_internal(init, true) + pub fn parse( + init: UrlPatternInit, + options: UrlPatternOptions, + ) -> Result { + Self::parse_internal(init, true, options) } pub(crate) fn parse_internal( init: UrlPatternInit, report_regex_errors: bool, + options: UrlPatternOptions, ) -> Result { let mut processed_init = init.process( ProcessType::Pattern, @@ -352,18 +366,26 @@ impl UrlPattern { .optionally_transpose_regex_error(report_regex_errors)? }; + let compile_options = parser::Options { + ignore_case: options.ignore_case, + ..Default::default() + }; + let pathname = if protocol.protocol_component_matches_special_scheme() { Component::compile( processed_init.pathname.as_deref(), canonicalize_and_process::canonicalize_pathname, - parser::Options::pathname(), + parser::Options { + ignore_case: options.ignore_case, + ..parser::Options::pathname() + }, )? .optionally_transpose_regex_error(report_regex_errors)? } else { Component::compile( processed_init.pathname.as_deref(), canonicalize_and_process::canonicalize_an_opaque_pathname, - parser::Options::default(), + compile_options.clone(), )? .optionally_transpose_regex_error(report_regex_errors)? }; @@ -393,13 +415,13 @@ impl UrlPattern { search: Component::compile( processed_init.search.as_deref(), canonicalize_and_process::canonicalize_search, - parser::Options::default(), + compile_options.clone(), )? .optionally_transpose_regex_error(report_regex_errors)?, hash: Component::compile( processed_init.hash.as_deref(), canonicalize_and_process::canonicalize_hash, - parser::Options::default(), + compile_options, )? .optionally_transpose_regex_error(report_regex_errors)?, }) @@ -580,20 +602,23 @@ pub struct UrlPatternComponentResult { #[cfg(test)] mod tests { + use regex::Regex; use std::collections::HashMap; use serde::Deserialize; + use serde::Serialize; use url::Url; use crate::quirks; use crate::quirks::StringOrInit; use crate::UrlPatternComponentResult; + use crate::UrlPatternOptions; use crate::UrlPatternResult; use super::UrlPattern; use super::UrlPatternInit; - #[derive(Deserialize)] + #[derive(Debug, Deserialize)] #[serde(untagged)] #[allow(clippy::large_enum_variant)] enum ExpectedMatch { @@ -607,10 +632,18 @@ mod tests { groups: HashMap>, } - #[derive(Deserialize)] + #[allow(clippy::large_enum_variant)] + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + #[serde(untagged)] + pub enum StringOrInitOrOptions { + Options(UrlPatternOptions), + StringOrInit(quirks::StringOrInit), + } + + #[derive(Debug, Deserialize)] struct TestCase { skip: Option, - pattern: Vec, + pattern: Vec, #[serde(default)] inputs: Vec, expected_obj: Option, @@ -657,11 +690,34 @@ mod tests { } fn test_case(case: TestCase) { - let input = case.pattern.first().cloned(); - let mut base_url = case.pattern.get(1).and_then(|input| match input { - StringOrInit::String(str) => Some(str.clone()), - StringOrInit::Init(_) => None, - }); + let mut input = quirks::StringOrInit::Init(Default::default()); + let mut base_url = None; + let mut options = None; + + for (i, pattern_input) in case.pattern.into_iter().enumerate() { + match pattern_input { + StringOrInitOrOptions::StringOrInit(str_or_init) => { + if i == 0 { + input = str_or_init; + } else if i == 1 { + base_url = match str_or_init { + StringOrInit::String(str) => Some(str.clone()), + StringOrInit::Init(_) => None, + }; + } else if matches!(&case.expected_obj, Some(StringOrInit::String(s)) if s == "error") + { + println!("Expected not to pass due to bad parameters"); + println!("✅ Passed"); + return; + } else { + panic!("Failed to parse testcase"); + } + } + StringOrInitOrOptions::Options(opts) => { + options = Some(opts); + } + } + } println!("\n====="); println!( @@ -669,20 +725,23 @@ mod tests { serde_json::to_string(&input).unwrap(), serde_json::to_string(&base_url).unwrap() ); + if let Some(options) = &options { + println!("Options: {}", serde_json::to_string(&options).unwrap(),); + } if let Some(reason) = case.skip { println!("🟠 Skipping: {reason}"); return; } - let input = input.unwrap_or_else(|| StringOrInit::Init(Default::default())); - let init_res = quirks::process_construct_pattern_input( input.clone(), base_url.as_deref(), ); - let res = init_res.and_then(::parse); + let res = init_res.and_then(|init_res| { + UrlPattern::::parse(init_res, options.unwrap_or_default()) + }); let expected_obj = match case.expected_obj { Some(StringOrInit::String(s)) if s == "error" => { assert!(res.is_err()); @@ -870,8 +929,8 @@ mod tests { let actual_match = exec_res.unwrap(); assert_eq!( - test, expected_match.is_some(), + test, "pattern.test result is not correct" ); @@ -947,10 +1006,13 @@ mod tests { #[test] fn issue26() { - ::parse(UrlPatternInit { - pathname: Some("/:foo.".to_owned()), - ..Default::default() - }) + UrlPattern::::parse( + UrlPatternInit { + pathname: Some("/:foo.".to_owned()), + ..Default::default() + }, + Default::default(), + ) .unwrap(); } @@ -965,17 +1027,23 @@ mod tests { #[test] fn has_regexp_group() { - let pattern = ::parse(UrlPatternInit { - pathname: Some("/:foo.".to_owned()), - ..Default::default() - }) + let pattern = ::parse( + UrlPatternInit { + pathname: Some("/:foo.".to_owned()), + ..Default::default() + }, + Default::default(), + ) .unwrap(); assert!(!pattern.has_regexp_groups()); - let pattern = ::parse(UrlPatternInit { - pathname: Some("/(.*?)".to_owned()), - ..Default::default() - }) + let pattern = ::parse( + UrlPatternInit { + pathname: Some("/(.*?)".to_owned()), + ..Default::default() + }, + Default::default(), + ) .unwrap(); assert!(pattern.has_regexp_groups()); } diff --git a/src/matcher.rs b/src/matcher.rs index 6d0fe04..997f708 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -8,6 +8,7 @@ pub(crate) struct Matcher { pub prefix: String, pub suffix: String, pub inner: InnerMatcher, + pub ignore_case: bool, } #[derive(Debug)] @@ -41,14 +42,6 @@ pub(crate) enum InnerMatcher { } impl Matcher { - pub(crate) fn literal(literal: String) -> Self { - Matcher { - prefix: "".to_string(), - suffix: "".to_string(), - inner: InnerMatcher::Literal { literal }, - } - } - pub fn matches<'a>( &self, mut input: &'a str, @@ -72,7 +65,13 @@ impl Matcher { } match &self.inner { - InnerMatcher::Literal { literal } => (input == literal).then(Vec::new), + InnerMatcher::Literal { literal } => { + if self.ignore_case { + (input.to_lowercase() == literal.to_lowercase()).then(Vec::new) + } else { + (input == literal).then(Vec::new) + } + } InnerMatcher::SingleCapture { filter, allow_empty, @@ -81,7 +80,14 @@ impl Matcher { return None; } if let Some(filter) = filter { - if input.contains(*filter) { + if self.ignore_case { + if input + .to_lowercase() + .contains(filter.to_lowercase().collect::>().as_slice()) + { + return None; + } + } else if input.contains(*filter) { return None; } } diff --git a/src/parser.rs b/src/parser.rs index 69169eb..b9e7bc3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -26,7 +26,8 @@ pub enum RegexSyntax { pub struct Options { pub delimiter_code_point: Option, pub prefix_code_point: String, // TODO: It must contain one ASCII code point or the empty string. maybe Option? - regex_syntax: RegexSyntax, + pub regex_syntax: RegexSyntax, + pub ignore_case: bool, } impl std::default::Default for Options { @@ -37,6 +38,7 @@ impl std::default::Default for Options { delimiter_code_point: None, prefix_code_point: String::new(), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } } @@ -49,6 +51,7 @@ impl Options { delimiter_code_point: Some('.'), prefix_code_point: String::new(), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } @@ -59,6 +62,7 @@ impl Options { delimiter_code_point: Some('/'), prefix_code_point: String::from("/"), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } diff --git a/src/quirks.rs b/src/quirks.rs index 21e9211..a08a02f 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -9,6 +9,7 @@ use crate::component::Component; use crate::parser::RegexSyntax; use crate::regexp::RegExp; pub use crate::Error; +use crate::UrlPatternOptions; #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct UrlPatternInit { @@ -167,26 +168,30 @@ impl From> for InnerMatcher { } } -struct EcmaRegexp(String); +struct EcmaRegexp(String, String); impl RegExp for EcmaRegexp { fn syntax() -> RegexSyntax { RegexSyntax::EcmaScript } - fn parse(pattern: &str) -> Result { - Ok(EcmaRegexp(pattern.to_string())) + fn parse(pattern: &str, flags: &str) -> Result { + Ok(EcmaRegexp(pattern.to_string(), flags.to_string())) } fn matches<'a>(&self, text: &'a str) -> Option>> { - let regexp = regex::Regex::parse(&self.0).ok()?; + let regexp = regex::Regex::parse(&self.0, &self.1).ok()?; regexp.matches(text) } } /// Parse a pattern into its components. -pub fn parse_pattern(init: crate::UrlPatternInit) -> Result { - let pattern = crate::UrlPattern::::parse_internal(init, false)?; +pub fn parse_pattern( + init: crate::UrlPatternInit, + options: UrlPatternOptions, +) -> Result { + let pattern = + crate::UrlPattern::::parse_internal(init, false, options)?; let urlpattern = UrlPattern { has_regexp_groups: pattern.has_regexp_groups(), protocol: pattern.protocol.into(), @@ -210,10 +215,10 @@ pub fn process_match_input( let mut inputs = (input.clone(), None); let init = match input { StringOrInit::String(url) => { - let base_url = if let Some(base_url) = base_url_str { - match Url::parse(base_url) { + let base_url = if let Some(base_url_str) = base_url_str { + match Url::parse(base_url_str) { Ok(base_url) => { - inputs.1 = Some(base_url.to_string()); + inputs.1 = Some(base_url_str.to_string()); Some(base_url) } Err(_) => return Ok(None), diff --git a/src/regexp.rs b/src/regexp.rs index 38aefc7..78bba14 100644 --- a/src/regexp.rs +++ b/src/regexp.rs @@ -5,7 +5,7 @@ pub trait RegExp: Sized { /// Generates a regexp pattern for the given string. If the pattern is /// invalid, the parse function should return an error. - fn parse(pattern: &str) -> Result; + fn parse(pattern: &str, flags: &str) -> Result; /// Matches the given text against the regular expression and returns the list /// of captures. The matches are returned in the order they appear in the @@ -22,8 +22,8 @@ impl RegExp for regex::Regex { RegexSyntax::Rust } - fn parse(pattern: &str) -> Result { - regex::Regex::new(pattern).map_err(|_| ()) + fn parse(pattern: &str, flags: &str) -> Result { + regex::Regex::new(&format!("(?{flags}){pattern}")).map_err(|_| ()) } fn matches<'a>(&self, text: &'a str) -> Option>> { diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 1a403d7..536dba5 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -1467,7 +1467,6 @@ } }, { - "skip": "https://github.com/denoland/rust-urlpattern/issues/12", "pattern": [{ "pathname": "/foo/bar" }], "inputs": [ "./foo/bar", "https://example.com" ], "expected_match": { @@ -2725,7 +2724,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [{ "pathname": "/foo/bar" }, { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO/BAR" }], "expected_match": { @@ -2733,7 +2731,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [{ "ignoreCase": true }], "inputs": [{ "pathname": "/FOO/BAR" }], "expected_match": { @@ -2741,7 +2738,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "https://example.com:8080/foo?bar#baz", { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", @@ -2764,7 +2760,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "/foo?bar#baz", "https://example.com:8080", { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", @@ -2787,7 +2782,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "/foo?bar#baz", { "ignoreCase": true }, "https://example.com:8080" ], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 17014c6..dd25cfb 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -85,7 +85,7 @@ impl Tokenizer { self.token_list.push(Token { kind, index: self.index, - value, // TODO: check if this is right + value, }); self.index = next_pos; }