From 1b1bda3fb15001ad5fbf05a7174bfc9ae72b486b Mon Sep 17 00:00:00 2001 From: NSoiffer Date: Fri, 3 Jan 2025 21:19:07 -0800 Subject: [PATCH] Restrict interval intent inference -- avoid nested "interval"s. Fixes #329 As part of this, I've added a new file to test intent inferences. --- Rules/Intent/general.yaml | 8 +++-- tests/Languages/intent.rs | 75 +++++++++++++++++++++++++++++++++++++++ tests/common/mod.rs | 60 +++++++++++++++++++++++++++++++ tests/languages.rs | 1 + 4 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 tests/Languages/intent.rs diff --git a/Rules/Intent/general.yaml b/Rules/Intent/general.yaml index 8e260b6c..57d4d793 100644 --- a/Rules/Intent/general.yaml +++ b/Rules/Intent/general.yaml @@ -76,7 +76,9 @@ # (a,b) has many interpretations; (a, b] (etc) have fewer interpretations. # as an interval, it represents a set and hence a clue that it is an interval is that a set operator comes # before or after it. '=' is also common. - # They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't inclub (xxx, yyy) in the inference + # They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't include (xxx, yyy) in the inference + # Note [x,y] is also a commutator. + # Intervals are never contained in other intervals, so rule them out if there is an ancestor that could be an interval name: interval tag: mrow match: @@ -84,7 +86,9 @@ # FIX: consider adding ]...[ versions - "(*[1][text()='(' or text()='['] and *[3][text()=')' or text()=']']) and" # match bracketing - "(*[2][count(*)=3 and *[2][text()=',']]) and" # inside should have ',' - - "not(ancestor::*[IsBracketed(., '{', '}')]) and " # intervals are not part of set notation (e.g, { (x,y)∈L | ...}) + # intervals are not part of set notation (e.g, { (x,y)∈L | ...}) nor are they nested + - "not(ancestor::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and " + - "not(descendant::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and " # FIX: if both the first and third children of *[2] are mn, then make sure first <= third - "(" - " not(IsBracketed(., '(', ')')) or " # (.,.) is very ambiguous -- need more clues diff --git a/tests/Languages/intent.rs b/tests/Languages/intent.rs new file mode 100644 index 00000000..23e64a42 --- /dev/null +++ b/tests/Languages/intent.rs @@ -0,0 +1,75 @@ +/// Tests for: +/// * functions including trig functions, logs, and functions to powers +/// * implied times/functional call and explicit times/function call +/// * parens +/// These are all intertwined, so they are in one file +use crate::common::*; + +#[test] +fn binomial() { + let mathml = " + ( + 7 3 + ) + "; + let intent = " + + 7 + 3 + + "; + test_intent(mathml, intent, vec![]); +} + +#[test] +fn closed_interval() { + let expr = r#" + [ + a + , + b + ] + "#; + let target = " + + a + b + + "; + test_intent(expr, target, vec![]); +} + +#[test] +fn nested_interval_bug_329() { + let expr = r#" + [ + A + , + [ + B + , + C + ] + ] + "#; + let target = " + + [ + + A + , + + [ + + B + , + C + + ] + + + ] + + "; + test_intent(expr, target, vec![]); +} \ No newline at end of file diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 93bc0e47..c4cbc58d 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -63,6 +63,8 @@ pub fn test(language: &str, style: &str, mathml: &str, speech: &str) { prefs.set_user_prefs("PauseFactor", "100").unwrap(); // makes testing simpler prefs.set_user_prefs("Verbosity", "Medium").unwrap(); prefs.set_user_prefs("Impairment", "Blindness").unwrap(); + prefs.set_user_prefs("DecimalSeparators", ".").unwrap(); + prefs.set_user_prefs("BlockSeparators", ", ").unwrap(); }); set_preference("Language".to_string(), language.to_string()).unwrap(); @@ -161,3 +163,61 @@ pub fn test_braille_prefs(code: &str, test_prefs: Vec<(&str, &str)>, mathml: &st }; } +#[allow(dead_code)] // used in testing +pub fn test_intent(mathml: &str, target: &str, test_prefs: Vec<(&str, &str)>) { + use sxd_document::{parser, dom::Element}; + set_rules_dir(abs_rules_dir_path()).unwrap(); + libmathcat::speech::SPEECH_RULES.with(|rules| { + let rules = rules.borrow_mut(); + let mut prefs = rules.pref_manager.borrow_mut(); + prefs.set_user_prefs("DecimalSeparators", ".").unwrap(); + prefs.set_user_prefs("BlockSeparators", ", ").unwrap(); + }); + + // crate::speech::SpeechRules::initialize_all_rules().unwrap(); + set_preference("IntentErrorRecovery".to_string(), "Error".to_string()).unwrap(); + for (pref_name, pref_value) in test_prefs.clone() { + set_preference(pref_name.to_string(), pref_value.to_string()).unwrap(); + }; + + let package = &parser::parse(target).expect("Failed to parse target input"); + let target = get_element(package); + trim_element(&target); + + let canonical_mathml = match set_mathml(mathml.to_string()) { + Ok(e) => e, + Err(e) => panic!("In set_mathml: {}", libmathcat::errors_to_string(&e)), + }; + let package = &parser::parse(&canonical_mathml).expect("Failed to parse target input"); + let canonical_mathml = get_element(package); + trim_element(&canonical_mathml); + let computed_intent = match libmathcat::speech::intent_from_mathml(canonical_mathml, package.as_document()) { + Ok(e) => e, + Err(e) => panic!("in intent_from_mathml: {}", libmathcat::errors_to_string(&e)), + }; + + // remove some attrs that make it harder to handwrite what the intent is: + // 'id' and 'data-id-added'; leaving 'data-from-mathml' as that is used by the code + clean_attrs(computed_intent); + + match is_same_element(&computed_intent, &target) { + Ok(_) => return , + Err(e) => panic!("{}", e), + } + + fn clean_attrs<'a>(mathml: Element<'a>) -> Element<'a> { + mathml.remove_attribute("id"); + mathml.remove_attribute("data-id-added"); + + let children = mathml.children(); + if children.is_empty() || (children.len() == 1 && children[0].element().is_none()) { + return mathml; + } + + for child in children { + clean_attrs(child.element().unwrap()); + } + return mathml; + } +} + diff --git a/tests/languages.rs b/tests/languages.rs index 7328bfba..b2b69e55 100644 --- a/tests/languages.rs +++ b/tests/languages.rs @@ -3,6 +3,7 @@ mod common; mod Languages { + mod intent; mod zh; mod en; mod fi;