Skip to content

Commit

Permalink
encode space to '%20' as per url standard
Browse files Browse the repository at this point in the history
Previously the space character was exclusively encoded to '+'. This is
wrong, as the URL Standard [0] specifies that the default is '%20'.
Another function has been introduced as well, which replicates the old
behavior and converts spaces to '+'.
Notice that this breaks the default behavior and could lead to bugs.

[0]: https://url.spec.whatwg.org/#string-percent-encode-after-encoding

Fixes: servo#927
Fixes: servo#888

Signed-off-by: Gabriel Goller <[email protected]>
  • Loading branch information
kaffarell committed Apr 30, 2024
1 parent de947ab commit c23893a
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 5 deletions.
60 changes: 57 additions & 3 deletions form_urlencoded/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,33 @@ impl<'a> Iterator for ParseIntoOwned<'a> {

/// The [`application/x-www-form-urlencoded` byte serializer](
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
/// Converts spaces (b' ') to the percent-encoded equivalent ("%20").
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize { bytes: input }
ByteSerialize {
bytes: input,
space_as_plus: false,
}
}

/// The [`application/x-www-form-urlencoded` byte serializer](
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
/// Converts spaces (b' ') to plus signs (b'+').
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize_space_as_plus(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize {
bytes: input,
space_as_plus: true,
}
}

/// Return value of `byte_serialize()`.
#[derive(Debug)]
pub struct ByteSerialize<'a> {
bytes: &'a [u8],
space_as_plus: bool,
}

fn byte_serialized_unchanged(byte: u8) -> bool {
Expand All @@ -139,7 +156,7 @@ impl<'a> Iterator for ByteSerialize<'a> {
if let Some((&first, tail)) = self.bytes.split_first() {
if !byte_serialized_unchanged(first) {
self.bytes = tail;
return Some(if first == b' ' {
return Some(if first == b' ' && self.space_as_plus {
"+"
} else {
percent_encode_byte(first)
Expand Down Expand Up @@ -337,7 +354,7 @@ impl<'a, T: Target> Serializer<'a, T> {
/// .append_pair("foo", "bar & baz")
/// .append_pair("saison", "Été+hiver")
/// .finish();
/// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
/// assert_eq!(encoded, "foo=bar%20%26%20baz&saison=%C3%89t%C3%A9%2Bhiver");
/// ```
///
/// Panics if called more than once.
Expand Down Expand Up @@ -428,3 +445,40 @@ pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
}

pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;

#[cfg(test)]
mod tests {
use alloc::string::String;

use crate::{byte_serialize, byte_serialize_space_as_plus};

#[test]
fn byte_serializer() {
let in_1 = "c ool/org";
let out_1 = "c%20ool%2Forg";

let in_2 = "a🔒nother&bu=ck?et";
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet";

assert_eq!(byte_serialize(in_1.as_bytes()).collect::<String>(), out_1);
assert_eq!(byte_serialize(in_2.as_bytes()).collect::<String>(), out_2);
}

#[test]
fn byte_serializer_space_as_plus() {
let in_1 = "c ool/org";
let out_1 = "c+ool%2Forg";

let in_2 = "a🔒nother&bu=ck?et ";
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet+";

assert_eq!(
byte_serialize_space_as_plus(in_1.as_bytes()).collect::<String>(),
out_1
);
assert_eq!(
byte_serialize_space_as_plus(in_2.as_bytes()).collect::<String>(),
out_2
);
}
}
4 changes: 2 additions & 2 deletions url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1623,9 +1623,9 @@ impl Url {
/// .clear()
/// .append_pair("foo", "bar & baz")
/// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
/// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
/// assert_eq!(url.query(), Some("foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver"));
/// assert_eq!(url.as_str(),
/// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
/// "https://example.net/?foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
/// # Ok(())
/// # }
/// # run().unwrap();
Expand Down

0 comments on commit c23893a

Please sign in to comment.