Skip to content

Commit

Permalink
Restrict interval intent inference -- avoid nested "interval"s. Fixes #…
Browse files Browse the repository at this point in the history
…329

As part of this, I've added a new file to test intent inferences.
  • Loading branch information
NSoiffer committed Jan 4, 2025
1 parent 3e5dd89 commit 1b1bda3
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 2 deletions.
8 changes: 6 additions & 2 deletions Rules/Intent/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,19 @@
# (a,b) has many interpretations; (a, b] (etc) have fewer interpretations.
# as an interval, it represents a set and hence a clue that it is an interval is that a set operator comes
# before or after it. '=' is also common.
# They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't inclub (xxx, yyy) in the inference
# They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't include (xxx, yyy) in the inference
# Note [x,y] is also a commutator.
# Intervals are never contained in other intervals, so rule them out if there is an ancestor that could be an interval
name: interval
tag: mrow
match:
- "count(*)=3 and "
# FIX: consider adding ]...[ versions
- "(*[1][text()='(' or text()='['] and *[3][text()=')' or text()=']']) and" # match bracketing
- "(*[2][count(*)=3 and *[2][text()=',']]) and" # inside should have ','
- "not(ancestor::*[IsBracketed(., '{', '}')]) and " # intervals are not part of set notation (e.g, { (x,y)∈L | ...})
# intervals are not part of set notation (e.g, { (x,y)∈L | ...}) nor are they nested
- "not(ancestor::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and "
- "not(descendant::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and "
# FIX: if both the first and third children of *[2] are mn, then make sure first <= third
- "("
- " not(IsBracketed(., '(', ')')) or " # (.,.) is very ambiguous -- need more clues
Expand Down
75 changes: 75 additions & 0 deletions tests/Languages/intent.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/// Tests for:
/// * functions including trig functions, logs, and functions to powers
/// * implied times/functional call and explicit times/function call
/// * parens
/// These are all intertwined, so they are in one file
use crate::common::*;

#[test]
fn binomial() {
let mathml = "<math><mrow intent='binomial($n, $m)'>
<mo>(</mo>
<mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
<mo>)</mo>
</mrow></math>";
let intent = "<math data-from-mathml='math'>
<binomial>
<mn data-from-mathml='mn' arg='n'>7</mn>
<mn data-from-mathml='mn' arg='m'>3</mn>
</binomial>
</math>";
test_intent(mathml, intent, vec![]);
}

#[test]
fn closed_interval() {
let expr = r#"<math>
<mo stretchy="false">[</mo>
<mi>a</mi>
<mo>,</mo>
<mi>b</mi>
<mo stretchy="false">]</mo>
</math>"#;
let target = "<math data-from-mathml='math'>
<closed-interval data-from-mathml='mrow' data-changed='added'>
<mi data-from-mathml='mi'>a</mi>
<mi data-from-mathml='mi'>b</mi>
</closed-interval>
</math>";
test_intent(expr, target, vec![]);
}

#[test]
fn nested_interval_bug_329() {
let expr = r#"<math>
<mo stretchy="false">[</mo>
<mi>A</mi>
<mo>,</mo>
<mo stretchy="false">[</mo>
<mi>B</mi>
<mo>,</mo>
<mi>C</mi>
<mo stretchy="false">]</mo>
<mo stretchy="false">]</mo>
</math>"#;
let target = "<math data-from-mathml='math'>
<mrow data-from-mathml='mrow' data-changed='added'>
<mo data-from-mathml='mo' stretchy='false'>[</mo>
<mrow data-from-mathml='mrow' data-changed='added'>
<mi data-from-mathml='mi'>A</mi>
<mo data-from-mathml='mo'>,</mo>
<mrow data-from-mathml='mrow' data-changed='added'>
<mo data-from-mathml='mo' stretchy='false'>[</mo>
<mrow data-from-mathml='mrow' data-changed='added'>
<mi data-from-mathml='mi'>B</mi>
<mo data-from-mathml='mo'>,</mo>
<mi data-from-mathml='mi'>C</mi>
</mrow>
<mo data-from-mathml='mo' stretchy='false'>]</mo>
</mrow>
</mrow>
<mo data-from-mathml='mo' stretchy='false'>]</mo>
</mrow>
</math>";
test_intent(expr, target, vec![]);
}
60 changes: 60 additions & 0 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ pub fn test(language: &str, style: &str, mathml: &str, speech: &str) {
prefs.set_user_prefs("PauseFactor", "100").unwrap(); // makes testing simpler
prefs.set_user_prefs("Verbosity", "Medium").unwrap();
prefs.set_user_prefs("Impairment", "Blindness").unwrap();
prefs.set_user_prefs("DecimalSeparators", ".").unwrap();
prefs.set_user_prefs("BlockSeparators", ", ").unwrap();
});

set_preference("Language".to_string(), language.to_string()).unwrap();
Expand Down Expand Up @@ -161,3 +163,61 @@ pub fn test_braille_prefs(code: &str, test_prefs: Vec<(&str, &str)>, mathml: &st
};
}

#[allow(dead_code)] // used in testing
pub fn test_intent(mathml: &str, target: &str, test_prefs: Vec<(&str, &str)>) {
use sxd_document::{parser, dom::Element};
set_rules_dir(abs_rules_dir_path()).unwrap();
libmathcat::speech::SPEECH_RULES.with(|rules| {
let rules = rules.borrow_mut();
let mut prefs = rules.pref_manager.borrow_mut();
prefs.set_user_prefs("DecimalSeparators", ".").unwrap();
prefs.set_user_prefs("BlockSeparators", ", ").unwrap();
});

// crate::speech::SpeechRules::initialize_all_rules().unwrap();
set_preference("IntentErrorRecovery".to_string(), "Error".to_string()).unwrap();
for (pref_name, pref_value) in test_prefs.clone() {
set_preference(pref_name.to_string(), pref_value.to_string()).unwrap();
};

let package = &parser::parse(target).expect("Failed to parse target input");
let target = get_element(package);
trim_element(&target);

let canonical_mathml = match set_mathml(mathml.to_string()) {
Ok(e) => e,
Err(e) => panic!("In set_mathml: {}", libmathcat::errors_to_string(&e)),
};
let package = &parser::parse(&canonical_mathml).expect("Failed to parse target input");
let canonical_mathml = get_element(package);
trim_element(&canonical_mathml);
let computed_intent = match libmathcat::speech::intent_from_mathml(canonical_mathml, package.as_document()) {
Ok(e) => e,
Err(e) => panic!("in intent_from_mathml: {}", libmathcat::errors_to_string(&e)),
};

// remove some attrs that make it harder to handwrite what the intent is:
// 'id' and 'data-id-added'; leaving 'data-from-mathml' as that is used by the code
clean_attrs(computed_intent);

match is_same_element(&computed_intent, &target) {
Ok(_) => return ,
Err(e) => panic!("{}", e),
}

fn clean_attrs<'a>(mathml: Element<'a>) -> Element<'a> {
mathml.remove_attribute("id");
mathml.remove_attribute("data-id-added");

let children = mathml.children();
if children.is_empty() || (children.len() == 1 && children[0].element().is_none()) {
return mathml;
}

for child in children {
clean_attrs(child.element().unwrap());
}
return mathml;
}
}

1 change: 1 addition & 0 deletions tests/languages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
mod common;

mod Languages {
mod intent;
mod zh;
mod en;
mod fi;
Expand Down

0 comments on commit 1b1bda3

Please sign in to comment.