Skip to content

Commit

Permalink
Merge branch 'main' into delimiter-char
Browse files Browse the repository at this point in the history
  • Loading branch information
crowlKats authored Jul 26, 2024
2 parents 138d568 + 218f408 commit 6cc1098
Show file tree
Hide file tree
Showing 12 changed files with 590 additions and 237 deletions.
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ repository = "https://github.com/denoland/rust-urlpattern"
license = "MIT"

[dependencies]
derive_more = "0.99.16"
url = "2.2.2"
regex = "1.4.3"
url = "2.4.1"
regex = "1.10.5"
serde = { version = "1.0.127", features = ["derive"] }
unic-ucd-ident = { version = "0.9.0", features = ["id"] }

Expand Down
22 changes: 22 additions & 0 deletions src/canonicalize_and_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,25 @@ pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> {
_ => None,
}
}

// Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string
pub fn process_base_url(input: &str, kind: &ProcessType) -> String {
if kind != &ProcessType::Pattern {
input.to_string()
} else {
escape_pattern_string(input)
}
}

// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
pub fn escape_pattern_string(input: &str) -> String {
assert!(input.is_ascii());
let mut result = String::new();
for char in input.chars() {
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
result.push('\\');
}
result.push(char);
}
result
}
22 changes: 7 additions & 15 deletions src/component.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.

use crate::canonicalize_and_process::escape_pattern_string;
use crate::matcher::InnerMatcher;
use crate::matcher::Matcher;
use crate::parser::Options;
Expand All @@ -18,6 +19,7 @@ pub(crate) struct Component<R: RegExp> {
pub regexp: Result<R, Error>,
pub group_name_list: Vec<String>,
pub matcher: Matcher<R>,
pub has_regexp_group: bool,
}

impl<R: RegExp> Component<R> {
Expand Down Expand Up @@ -46,6 +48,9 @@ impl<R: RegExp> Component<R> {
regexp,
group_name_list: name_list,
matcher,
has_regexp_group: part_list
.iter()
.any(|part| part.kind == PartType::Regexp),
})
}

Expand All @@ -67,13 +72,13 @@ impl<R: RegExp> Component<R> {
pub(crate) fn create_match_result(
&self,
input: String,
exec_result: Vec<&str>,
exec_result: Vec<Option<&str>>,
) -> crate::UrlPatternComponentResult {
let groups = self
.group_name_list
.clone()
.into_iter()
.zip(exec_result.into_iter().map(str::to_owned))
.zip(exec_result.into_iter().map(|s| s.map(str::to_owned)))
.collect();
crate::UrlPatternComponentResult { input, groups }
}
Expand Down Expand Up @@ -258,19 +263,6 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
result
}

// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
fn escape_pattern_string(input: &str) -> String {
assert!(input.is_ascii());
let mut result = String::new();
for char in input.chars() {
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
result.push('\\');
}
result.push(char);
}
result
}

/// This function generates a matcher for a given parts list.
fn generate_matcher<R: RegExp>(
mut part_list: &[&Part],
Expand Down
87 changes: 71 additions & 16 deletions src/constructor_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ impl<'a> ConstructorStringParser<'a> {
}

// Ref: https://wicg.github.io/urlpattern/#change-state
fn change_state(&mut self, state: ConstructorStringParserState, skip: usize) {
fn change_state(
&mut self,
new_state: ConstructorStringParserState,
skip: usize,
) {
match self.state {
ConstructorStringParserState::Protocol => {
self.result.protocol = Some(self.make_component_string())
Expand All @@ -153,10 +157,69 @@ impl<'a> ConstructorStringParser<'a> {
ConstructorStringParserState::Hash => {
self.result.hash = Some(self.make_component_string())
}
_ => {}
ConstructorStringParserState::Init
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Done => {}
}

self.state = state;
if self.state != ConstructorStringParserState::Init
&& new_state != ConstructorStringParserState::Done
{
if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
) && matches!(
new_state,
ConstructorStringParserState::Port
| ConstructorStringParserState::Pathname
| ConstructorStringParserState::Search
| ConstructorStringParserState::Hash
) && self.result.hostname.is_none()
{
self.result.hostname = Some(String::new());
}

if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
| ConstructorStringParserState::Hostname
| ConstructorStringParserState::Port
) && matches!(
new_state,
ConstructorStringParserState::Search
| ConstructorStringParserState::Hash
) && self.result.pathname.is_none()
{
if self.protocol_matches_special_scheme {
self.result.pathname = Some(String::from("/"));
} else {
self.result.pathname = Some(String::new());
}
}

if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
| ConstructorStringParserState::Hostname
| ConstructorStringParserState::Port
| ConstructorStringParserState::Pathname
) && new_state == ConstructorStringParserState::Hash
&& self.result.search.is_none()
{
self.result.search = Some(String::new());
}
}

self.state = new_state;
self.token_index += skip;
self.component_start = self.token_index;
self.token_increment = 0;
Expand Down Expand Up @@ -273,11 +336,8 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
parser.change_state(ConstructorStringParserState::Hash, 1);
} else if parser.is_search_prefix() {
parser.change_state(ConstructorStringParserState::Search, 1);
parser.result.hash = Some(String::new());
} else {
parser.change_state(ConstructorStringParserState::Pathname, 0);
parser.result.search = Some(String::new());
parser.result.hash = Some(String::new());
}
parser.token_index += parser.token_increment;
continue;
Expand Down Expand Up @@ -306,22 +366,12 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
match parser.state {
ConstructorStringParserState::Init => {
if parser.is_protocol_suffix() {
parser.result.username = Some(String::new());
parser.result.password = Some(String::new());
parser.result.hostname = Some(String::new());
parser.result.port = Some(String::new());
parser.result.pathname = Some(String::new());
parser.result.search = Some(String::new());
parser.result.hash = Some(String::new());
parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
}
}
ConstructorStringParserState::Protocol => {
if parser.is_protocol_suffix() {
parser.compute_protocol_matches_special_scheme::<R>()?;
if parser.protocol_matches_special_scheme {
parser.result.pathname = Some(String::from("/"));
}
let mut next_state = ConstructorStringParserState::Pathname;
let mut skip = 1;
if parser.next_is_authority_slashes() {
Expand Down Expand Up @@ -398,5 +448,10 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
}
parser.token_index += parser.token_increment;
}

if parser.result.hostname.is_some() && parser.result.port.is_none() {
parser.result.port = Some(String::new());
}

Ok(parser.result)
}
81 changes: 54 additions & 27 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,80 @@
use derive_more::Display;
use std::fmt;

use crate::tokenizer::TokenType;

/// A error occurring during URL pattern construction, or matching.
#[derive(Display)]
#[derive(Debug)]
pub enum Error {
#[display(fmt = "a relative input without a base URL is not valid")]
BaseUrlRequired,

#[display(
fmt = "specifying both an init object, and a separate base URL is not valid"
)]
BaseUrlWithInit,

#[display(fmt = "tokenizer error: {_0} (at char {_1})")]
Tokenizer(TokenizerError, usize),

#[display(fmt = "parser error: {_0}")]
Parser(ParserError),

Url(url::ParseError),

#[display(fmt = "regexp error")]
RegExp(()),
}

impl std::error::Error for Error {}

impl std::fmt::Debug for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::BaseUrlRequired => {
f.write_str("a relative input without a base URL is not valid")
}
Error::BaseUrlWithInit => f.write_str(
"specifying both an init object, and a separate base URL is not valid",
),
Error::Tokenizer(err, pos) => {
write!(f, "tokenizer error: {err} (at char {pos})")
}
Error::Parser(err) => write!(f, "parser error: {err}"),
Error::Url(err) => err.fmt(f),
Error::RegExp(_) => f.write_str("regexp error"),
}
}
}

#[derive(Debug, Display)]
impl std::error::Error for Error {}

#[derive(Debug)]
pub enum TokenizerError {
#[display(fmt = "incomplete escape code")]
IncompleteEscapeCode,
#[display(fmt = "invalid name; must be at least length 1")]
InvalidName,
#[display(fmt = "invalid regex: {_0}")]
InvalidRegex(&'static str),
}

#[derive(Debug, Display)]
impl fmt::Display for TokenizerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::IncompleteEscapeCode => f.write_str("incomplete escape code"),
Self::InvalidName => {
f.write_str("invalid name; must be at least length 1")
}
Self::InvalidRegex(err) => write!(f, "invalid regex: {err}"),
}
}
}

impl std::error::Error for TokenizerError {}

#[derive(Debug)]
pub enum ParserError {
#[display(fmt = "expected token {_0}, found '{_2}' of type {_1}")]
ExpectedToken(TokenType, TokenType, String),

#[display(fmt = "pattern contains duplicate name {_0}")]
DuplicateName(String),
}

impl fmt::Display for ParserError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ExpectedToken(expected_ty, found_ty, found_val) => {
write!(
f,
"expected token {expected_ty:?}, found '{found_val}' of type {found_ty:?}"
)
}
Self::DuplicateName(name) => {
write!(f, "pattern contains duplicate name {name}")
}
}
}
}

impl std::error::Error for ParserError {}
Loading

0 comments on commit 6cc1098

Please sign in to comment.