From c23893a0d6bb5bcf4c093cb7c035dbc5ac101946 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Tue, 30 Apr 2024 16:20:33 +0200 Subject: [PATCH 1/2] encode space to '%20' as per url standard Previously the space character was exclusively encoded to '+'. This is wrong, as the URL Standard [0] specifies that the default is '%20'. Another function has been introduced as well, which replicates the old behavior and converts spaces to '+'. Notice that this breaks the default behavior and could lead to bugs. [0]: https://url.spec.whatwg.org/#string-percent-encode-after-encoding Fixes: #927 Fixes: #888 Signed-off-by: Gabriel Goller --- form_urlencoded/src/lib.rs | 60 ++++++++++++++++++++++++++++++++++++-- url/src/lib.rs | 4 +-- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/form_urlencoded/src/lib.rs b/form_urlencoded/src/lib.rs index 1d68579b7..b6236284f 100644 --- a/form_urlencoded/src/lib.rs +++ b/form_urlencoded/src/lib.rs @@ -116,16 +116,33 @@ impl<'a> Iterator for ParseIntoOwned<'a> { /// The [`application/x-www-form-urlencoded` byte serializer]( /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// Converts spaces (b' ') to the percent-encoded equivalent ("%20"). /// /// Return an iterator of `&str` slices. pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { - ByteSerialize { bytes: input } + ByteSerialize { + bytes: input, + space_as_plus: false, + } +} + +/// The [`application/x-www-form-urlencoded` byte serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// Converts spaces (b' ') to plus signs (b'+'). +/// +/// Return an iterator of `&str` slices. +pub fn byte_serialize_space_as_plus(input: &[u8]) -> ByteSerialize<'_> { + ByteSerialize { + bytes: input, + space_as_plus: true, + } } /// Return value of `byte_serialize()`. #[derive(Debug)] pub struct ByteSerialize<'a> { bytes: &'a [u8], + space_as_plus: bool, } fn byte_serialized_unchanged(byte: u8) -> bool { @@ -139,7 +156,7 @@ impl<'a> Iterator for ByteSerialize<'a> { if let Some((&first, tail)) = self.bytes.split_first() { if !byte_serialized_unchanged(first) { self.bytes = tail; - return Some(if first == b' ' { + return Some(if first == b' ' && self.space_as_plus { "+" } else { percent_encode_byte(first) @@ -337,7 +354,7 @@ impl<'a, T: Target> Serializer<'a, T> { /// .append_pair("foo", "bar & baz") /// .append_pair("saison", "Été+hiver") /// .finish(); - /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); + /// assert_eq!(encoded, "foo=bar%20%26%20baz&saison=%C3%89t%C3%A9%2Bhiver"); /// ``` /// /// Panics if called more than once. @@ -428,3 +445,40 @@ pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { } pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; + +#[cfg(test)] +mod tests { + use alloc::string::String; + + use crate::{byte_serialize, byte_serialize_space_as_plus}; + + #[test] + fn byte_serializer() { + let in_1 = "c ool/org"; + let out_1 = "c%20ool%2Forg"; + + let in_2 = "a🔒nother&bu=ck?et"; + let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet"; + + assert_eq!(byte_serialize(in_1.as_bytes()).collect::(), out_1); + assert_eq!(byte_serialize(in_2.as_bytes()).collect::(), out_2); + } + + #[test] + fn byte_serializer_space_as_plus() { + let in_1 = "c ool/org"; + let out_1 = "c+ool%2Forg"; + + let in_2 = "a🔒nother&bu=ck?et "; + let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet+"; + + assert_eq!( + byte_serialize_space_as_plus(in_1.as_bytes()).collect::(), + out_1 + ); + assert_eq!( + byte_serialize_space_as_plus(in_2.as_bytes()).collect::(), + out_2 + ); + } +} diff --git a/url/src/lib.rs b/url/src/lib.rs index 1959a1213..8a8139127 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -1623,9 +1623,9 @@ impl Url { /// .clear() /// .append_pair("foo", "bar & baz") /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver"); - /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); + /// assert_eq!(url.query(), Some("foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver")); /// assert_eq!(url.as_str(), - /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); + /// "https://example.net/?foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); /// # Ok(()) /// # } /// # run().unwrap(); From 4e3f1b576cfca304945fbcb523ed7cf3f1876929 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Thu, 2 May 2024 11:36:29 +0200 Subject: [PATCH 2/2] fix docs link Fixed link to url spec. Signed-off-by: Gabriel Goller --- form_urlencoded/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/form_urlencoded/src/lib.rs b/form_urlencoded/src/lib.rs index b6236284f..711ab6210 100644 --- a/form_urlencoded/src/lib.rs +++ b/form_urlencoded/src/lib.rs @@ -115,7 +115,7 @@ impl<'a> Iterator for ParseIntoOwned<'a> { } /// The [`application/x-www-form-urlencoded` byte serializer]( -/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding). /// Converts spaces (b' ') to the percent-encoded equivalent ("%20"). /// /// Return an iterator of `&str` slices. @@ -127,7 +127,7 @@ pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { } /// The [`application/x-www-form-urlencoded` byte serializer]( -/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding). /// Converts spaces (b' ') to plus signs (b'+'). /// /// Return an iterator of `&str` slices.