Skip to content

Commit

Permalink
Optimize output of non-required properties
Browse files Browse the repository at this point in the history
For this schema:

```
{
  "properties": {
    "a": {
      "const": "a"
    },
    "b": {
      "const": "b"
    },
    "c": {
      "const": "c"
    }
  }
}
```

We currently produce the following regex (spacing added around
alternatives for clarity)

```
\{("a":"a"(,"b":"b")?(,"c":"c")?
 |("a":"a",)?"b":"b"(,"c":"c")?
 |("a":"a",)?("b":"b",)?"c":"c")?\}
```

This works perfectly well, but contains redundancy. This is seen by the
fact that all three alternatives would match JSON with all three fields.

The difference between cases at the moment, is which field is mandatory.

I propose that we make the alternatives model the choice of last field.
This will produce a regex like this:

```
\{("a":"a"
 |("a":"a",)?"b":"b"
 |("a":"a",)?("b":"b",)?"c":"c")?\}
```

This will give us a shorter, but 100% equivalent regex.
  • Loading branch information
414owen authored and torymur committed Dec 20, 2024
1 parent aae336c commit 5613a46
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
6 changes: 3 additions & 3 deletions src/json_schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ mod tests {
"title": "Character",
"type": "object"
}"#,
format!(r#"\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)([ ]?,[ ]?"age"[ ]?:[ ]?({INTEGER}|null))?([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?({INTEGER}|null)([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\}}"#).as_str(),
format!(r#"\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?({INTEGER}|null)|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\}}"#).as_str(),
vec![
r#"{ "name" : "Player" }"#,
r#"{ "name" : "Player", "age" : 10, "strength" : 10 }"#,
Expand Down Expand Up @@ -898,7 +898,7 @@ mod tests {
] {
let json: Value = serde_json::from_str(schema).expect("Can't parse json");
let result = to_regex(&json, None).expect("To regex failed");
assert_eq!(result, regex);
assert_eq!(result, regex, "JSON Schema {} didn't match", schema);

let re = Regex::new(&result).expect("Regex failed");
for m in a_match {
Expand Down Expand Up @@ -1075,7 +1075,7 @@ mod tests {
assert!(result.is_ok(), "{:?}", result);
let regex = result.unwrap();
assert_eq!(
r#"\{([ ]?"node"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)([ ]?,[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?|([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?,)?[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?[ ]?\})?[ ]?\}"#,
r#"\{([ ]?"node"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)|([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?,)?[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?[ ]?\})?[ ]?\}"#,
regex,
);
}
Expand Down
3 changes: 0 additions & 3 deletions src/json_schema/parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,6 @@ impl<'a> Parser<'a> {
pattern += &format!("({}{},)?", subregex, self.whitespace_pattern);
}
pattern += &property_subregexes[i];
for subregex in &property_subregexes[i + 1..] {
pattern += &format!("({},{})?", self.whitespace_pattern, subregex);
}
possible_patterns.push(pattern);
}

Expand Down

0 comments on commit 5613a46

Please sign in to comment.