From dc14a22457b747d7166e4e09ce360cd1853d5e3c Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 20:50:17 +0530 Subject: [PATCH 1/7] update parsing for uri --- src/json_schema/parsing.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/json_schema/parsing.rs b/src/json_schema/parsing.rs index 9ac373b..c7eef77 100644 --- a/src/json_schema/parsing.rs +++ b/src/json_schema/parsing.rs @@ -377,11 +377,18 @@ impl<'a> Parser<'a> { Ok(format!(r#"("{}")"#, pattern)) } } else if let Some(format) = obj.get("format").and_then(Value::as_str) { - match types::FormatType::from_str(format) { - Some(format_type) => Ok(format_type.to_regex().to_string()), - None => Err(JsonSchemaParserError::StringTypeUnsupportedFormat( - Box::from(format), - )), + if format == "uri" { + // Regex for URI format as per JSON Schema + // let uri_regex = r"^(https?|ftp|urn):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$"; + let uri_regex = r"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$"; + Ok(uri_regex.to_string()) + } else { + match types::FormatType::from_str(format) { + Some(format_type) => Ok(format_type.to_regex().to_string()), + None => Err(JsonSchemaParserError::StringTypeUnsupportedFormat( + Box::from(format), + )), + } } } else { Ok(types::JsonType::String.to_regex().to_string()) From cbcb5e918a928b5d43714aede4cc86d63247cba1 Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 20:50:56 +0530 Subject: [PATCH 2/7] add test for parsing uri --- src/json_schema/mod.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/json_schema/mod.rs b/src/json_schema/mod.rs index 950306c..fddec41 100644 --- a/src/json_schema/mod.rs +++ b/src/json_schema/mod.rs @@ -1098,4 +1098,34 @@ mod tests { let regex = to_regex(&json_value, None); assert!(regex.is_ok(), "{:?}", regex); } + + #[test] + fn test_uri_format() { + let schema = r#"{"title": "Foo", "type": "string", "format": "uri"}"#; + let json: Value = serde_json::from_str(schema).expect("Can't parse json"); + let regex = to_regex(&json, None).expect("To regex failed"); + let re = Regex::new(®ex).expect("Regex failed"); + + let valid_uris = vec![ + "http://example.com", + "https://example.com/path?query=param#fragment", + "ftp://ftp.example.com/resource", + "urn:isbn:0451450523", + ]; + + let invalid_uris = vec![ + "http:/example.com", // missing slash + "htp://example.com", // invalid scheme + "http://", // missing host + "example.com", // missing scheme + ]; + + for uri in valid_uris { + assert!(re.is_match(uri), "Valid URI failed: {}", uri); + } + + for uri in invalid_uris { + assert!(!re.is_match(uri), "Invalid URI matched: {}", uri); + } + } } From 4b194035244dcbb3e73781022542381f91d3b855 Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 21:40:14 +0530 Subject: [PATCH 3/7] integrate uri test cases as part of main test_schema_matches_regex --- src/json_schema/mod.rs | 49 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/src/json_schema/mod.rs b/src/json_schema/mod.rs index fddec41..af2e5b7 100644 --- a/src/json_schema/mod.rs +++ b/src/json_schema/mod.rs @@ -876,6 +876,25 @@ mod tests { ], vec!["this isnt valid json"], ), + // ========================================================== + // URI Format + // ========================================================== + ( + r#"{"title": "Foo", "type": "string", "format": "uri"}"#, + r"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$", + vec![ + "http://example.com", + "https://example.com/path?query=param#fragment", + "ftp://ftp.example.com/resource", + "urn:isbn:0451450523", + ], + vec![ + "http:/example.com", // missing slash + "htp://example.com", // invalid scheme + "http://", // missing host + "example.com", // missing scheme + ], + ), ] { let json: Value = serde_json::from_str(schema).expect("Can't parse json"); let result = to_regex(&json, None).expect("To regex failed"); @@ -1098,34 +1117,4 @@ mod tests { let regex = to_regex(&json_value, None); assert!(regex.is_ok(), "{:?}", regex); } - - #[test] - fn test_uri_format() { - let schema = r#"{"title": "Foo", "type": "string", "format": "uri"}"#; - let json: Value = serde_json::from_str(schema).expect("Can't parse json"); - let regex = to_regex(&json, None).expect("To regex failed"); - let re = Regex::new(®ex).expect("Regex failed"); - - let valid_uris = vec![ - "http://example.com", - "https://example.com/path?query=param#fragment", - "ftp://ftp.example.com/resource", - "urn:isbn:0451450523", - ]; - - let invalid_uris = vec![ - "http:/example.com", // missing slash - "htp://example.com", // invalid scheme - "http://", // missing host - "example.com", // missing scheme - ]; - - for uri in valid_uris { - assert!(re.is_match(uri), "Valid URI failed: {}", uri); - } - - for uri in invalid_uris { - assert!(!re.is_match(uri), "Invalid URI matched: {}", uri); - } - } } From 9926d0256b3daa9915943425c2482d86050e58de Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 21:40:39 +0530 Subject: [PATCH 4/7] remove comment --- src/json_schema/parsing.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/json_schema/parsing.rs b/src/json_schema/parsing.rs index c7eef77..235632d 100644 --- a/src/json_schema/parsing.rs +++ b/src/json_schema/parsing.rs @@ -378,8 +378,6 @@ impl<'a> Parser<'a> { } } else if let Some(format) = obj.get("format").and_then(Value::as_str) { if format == "uri" { - // Regex for URI format as per JSON Schema - // let uri_regex = r"^(https?|ftp|urn):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$"; let uri_regex = r"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$"; Ok(uri_regex.to_string()) } else { From 113d06e6bd2aaf5b4213ea815094547d677a4794 Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 21:48:38 +0530 Subject: [PATCH 5/7] Revert "update parsing for uri" This reverts commit dc14a22457b747d7166e4e09ce360cd1853d5e3c. --- src/json_schema/parsing.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/json_schema/parsing.rs b/src/json_schema/parsing.rs index 235632d..9ac373b 100644 --- a/src/json_schema/parsing.rs +++ b/src/json_schema/parsing.rs @@ -377,16 +377,11 @@ impl<'a> Parser<'a> { Ok(format!(r#"("{}")"#, pattern)) } } else if let Some(format) = obj.get("format").and_then(Value::as_str) { - if format == "uri" { - let uri_regex = r"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$"; - Ok(uri_regex.to_string()) - } else { - match types::FormatType::from_str(format) { - Some(format_type) => Ok(format_type.to_regex().to_string()), - None => Err(JsonSchemaParserError::StringTypeUnsupportedFormat( - Box::from(format), - )), - } + match types::FormatType::from_str(format) { + Some(format_type) => Ok(format_type.to_regex().to_string()), + None => Err(JsonSchemaParserError::StringTypeUnsupportedFormat( + Box::from(format), + )), } } else { Ok(types::JsonType::String.to_regex().to_string()) From 57e58e32ca7b3f94da0950f2ecae8c35f0b69fd8 Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 21:49:51 +0530 Subject: [PATCH 6/7] add URI to types --- src/json_schema/types.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/json_schema/types.rs b/src/json_schema/types.rs index aff5e53..cc2240b 100644 --- a/src/json_schema/types.rs +++ b/src/json_schema/types.rs @@ -34,6 +34,7 @@ pub static DATE_TIME: &str = r#""(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3 pub static DATE: &str = r#""(?:\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])""#; pub static TIME: &str = r#""(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\.[0-9]+)?(Z)?""#; pub static UUID: &str = r#""[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}""#; +pub static URI: &str = r#"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$"#; #[derive(Debug, PartialEq)] pub enum FormatType { @@ -41,6 +42,7 @@ pub enum FormatType { Date, Time, Uuid, + Uri, } impl FormatType { @@ -50,6 +52,7 @@ impl FormatType { FormatType::Date => DATE, FormatType::Time => TIME, FormatType::Uuid => UUID, + FormatType::Uri => URI, } } @@ -60,6 +63,7 @@ impl FormatType { "date" => Some(FormatType::Date), "time" => Some(FormatType::Time), "uuid" => Some(FormatType::Uuid), + "uri" => Some(FormatType::Uri), _ => None, } } From bed3bfdcd0b7b406cb072f2bb07c67f4e466110e Mon Sep 17 00:00:00 2001 From: Aakash Thatte Date: Thu, 19 Dec 2024 21:59:18 +0530 Subject: [PATCH 7/7] use URI in tests --- src/json_schema/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json_schema/mod.rs b/src/json_schema/mod.rs index af2e5b7..4ec5c7c 100644 --- a/src/json_schema/mod.rs +++ b/src/json_schema/mod.rs @@ -881,7 +881,7 @@ mod tests { // ========================================================== ( r#"{"title": "Foo", "type": "string", "format": "uri"}"#, - r"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$", + URI, vec![ "http://example.com", "https://example.com/path?query=param#fragment",