diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d03dcc3..8c7150a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
# Change History
+## 6.4.3 - January 29, 2019
+* _Bugfix_: To prevent common false positives for single-letter Roman ordinals (especially in French and Dutch), Roman numeral matching is now only enabled when `Settings::set_smart_ordinal_suffix_match_roman_numerals` is set to `true`. In addition, only `I`, `V`, and `X` are accepted as single-letter Roman numbers.
+
## 6.4.2 - January 27, 2019
* _Bugfix_: The Unicode hyphen character (`‐`) is recognized as a valid word combiner.
diff --git a/src/class-settings.php b/src/class-settings.php
index 6c0a295b..b9d31ea6 100644
--- a/src/class-settings.php
+++ b/src/class-settings.php
@@ -264,6 +264,7 @@ public function set_defaults() {
$this->set_diacritic_custom_replacements();
$this->set_smart_marks();
$this->set_smart_ordinal_suffix();
+ $this->set_smart_ordinal_suffix_match_roman_numerals();
$this->set_smart_math();
$this->set_smart_fractions();
$this->set_smart_exponents();
@@ -731,6 +732,17 @@ public function set_smart_ordinal_suffix( $on = true ) {
$this->data['smartOrdinalSuffix'] = $on;
}
+ /**
+ * Enables/disables replacement of XXe with XXe.
+ *
+ * @since 6.5.0
+ *
+ * @param bool $on Optional. Default false.
+ */
+ public function set_smart_ordinal_suffix_match_roman_numerals( $on = false ) {
+ $this->data['smartOrdinalSuffixRomanNumerals'] = $on;
+ }
+
/**
* Enables/disables forcing single character words to next line with the insertion of .
*
diff --git a/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php b/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php
index 2dede364..4f9588ad 100644
--- a/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php
+++ b/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php
@@ -43,15 +43,45 @@
*/
class Smart_Ordinal_Suffix_Fix extends Abstract_Node_Fix {
- const RE_ARABIC_ORDINALS = '/' . self::WORD_BOUNDARY_START . '(\d+)(' . self::ENGLISH_SUFFIXES . '|' . self::FRENCH_SUFFIXES . '|' . self::LATIN_SUFFIXES . ')' . self::WORD_BOUNDARY_END . '/Su';
- const ENGLISH_SUFFIXES = 'st|nd|rd|th';
- const FRENCH_SUFFIXES = 'er|re|e|ère|d|nd|nde|e|de|me|ème|è';
- const LATIN_SUFFIXES = 'o';
- const RE_ROMAN_ORDINALS = '/' . self::WORD_BOUNDARY_START . '(' . self::ROMAN_NUMERALS . ')(' . self::FRENCH_SUFFIXES . '|' . self::LATIN_SUFFIXES . ')' . self::WORD_BOUNDARY_END . '/Sxu';
- const ROMAN_NUMERALS = '(?=[MDCLXVI])M*(?:C[MD]|D?C*)(?:X[CL]|L?X*)(?:I[XV]|V?I*)';
+ // Possible suffixes.
+ const ENGLISH_SUFFIXES = 'st|nd|rd|th';
+ const FRENCH_SUFFIXES = 'er|re|e|ère|d|nd|nde|de|me|ème|è';
+ const LATIN_SUFFIXES = 'o';
+
+ // Ordinals with arabic numerals.
+ const RE_ARABIC_ORDINALS = '/' .
+ self::WORD_BOUNDARY_START . '
+ (\d+)
+ (' .
+ self::ENGLISH_SUFFIXES . '|' .
+ self::FRENCH_SUFFIXES . '|' .
+ self::LATIN_SUFFIXES . '
+ )' .
+ self::WORD_BOUNDARY_END . '
+ /Sxu';
+
+ // Ordinals with Roman numerals.
+ const RE_ROMAN_ORDINALS = '/' .
+ self::WORD_BOUNDARY_START . '
+ (
+ # Prevent single letter numbers other than I, V, and X.
+ (?=(?:I|V|X|' . self::ROMAN_NUMERALS . '{2,}))
+
+ # Otherwise, allow all valid Roman numbers.
+ (?=' . self::ROMAN_NUMERALS . ')M*(?:C[MD]|D?C*)(?:X[CL]|L?X*)(?:I[XV]|V?I*)
+ )
+ (' .
+ self::FRENCH_SUFFIXES . '|' .
+ self::LATIN_SUFFIXES . '
+ )' .
+ self::WORD_BOUNDARY_END . '
+ /Sxu';
+
+ // Additional character classes.
+ const ROMAN_NUMERALS = '[MDCLXVI]';
// Zero-width spaces and soft hyphens should not be treated as word boundaries.
- const WORD_BOUNDARY_START = '\b(?![' . U::SOFT_HYPHEN . U::ZERO_WIDTH_SPACE . '])';
+ const WORD_BOUNDARY_START = '\b(?data = \preg_replace( [ self::RE_ARABIC_ORDINALS, self::RE_ROMAN_ORDINALS ], $this->replacement, $textnode->data );
+ // Always match Arabic numbers.
+ $patterns = [ self::RE_ARABIC_ORDINALS ];
+
+ // Only match Roman numbers if explicitely enabled.
+ if ( ! empty( $settings['smartOrdinalSuffixRomanNumerals'] ) ) {
+ $patterns[] = self::RE_ROMAN_ORDINALS;
+ }
+
+ $textnode->data = \preg_replace( $patterns, $this->replacement, $textnode->data );
}
}
diff --git a/tests/class-settings-test.php b/tests/class-settings-test.php
index 4afff2b0..34e1b693 100644
--- a/tests/class-settings-test.php
+++ b/tests/class-settings-test.php
@@ -885,6 +885,19 @@ public function test_set_smart_ordinal_suffix() {
$this->assertFalse( $this->settings['smartOrdinalSuffix'] );
}
+ /**
+ * Tests set_smart_ordinal_suffix_match_roman_numerals.
+ *
+ * @covers ::set_smart_ordinal_suffix_match_roman_numerals
+ */
+ public function test_set_smart_ordinal_suffix_match_roman_numerals() {
+ $this->settings->set_smart_ordinal_suffix_match_roman_numerals( true );
+ $this->assertTrue( $this->settings['smartOrdinalSuffixRomanNumerals'] );
+
+ $this->settings->set_smart_ordinal_suffix_match_roman_numerals( false );
+ $this->assertFalse( $this->settings['smartOrdinalSuffixRomanNumerals'] );
+ }
+
/**
* Tests set_single_character_word_spacing.
*
diff --git a/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php b/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php
index 810a064d..37ad4b96 100644
--- a/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php
+++ b/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php
@@ -76,24 +76,39 @@ public function test_array_constructor() {
*
* @return array
*/
- public function provide_smart_ordinal_suffix() {
+ public function provide_smart_ordinal_suffix_data() {
return [
[ 'in the 1st instance', 'in the 1st instance', '' ],
[ 'in the 2nd degree', 'in the 2nd degree', '' ],
[ 'a 3rd party', 'a 3rd party', '' ],
[ '12th Night', '12th Night', '' ],
- [ 'la IIIIre heure', 'la IIIIre heure', '' ],
- [ 'François Ier', 'François Ier', '' ],
- [ 'MDCCLXXVIo', 'MDCCLXXVIo', '' ],
- [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
- [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
- [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
[ 'in the 1st instance, we', 'in the 1st instance, we', 'ordinal' ],
[ 'murder in the 2nd degree', 'murder in the 2nd degree', 'ordinal' ],
[ 'a 3rd party', 'a 3rd party', 'ordinal' ],
[ 'the 12th Night', 'the 12th Night', 'ordinal' ],
[ 'la 1ère guerre', 'la 1ère guerre', 'ordinal' ],
[ 'la 1re guerre mondiale', 'la 1re guerre mondiale', 'ordinal' ],
+ ];
+ }
+
+ /**
+ * Provide data for testing ordinal suffixes.
+ *
+ * @return array
+ */
+ public function provide_smart_ordinal_suffix_roman_numeral_data() {
+ return [
+ [ 'la IIIIre heure', 'la IIIIre heure', '' ],
+ [ 'la IVre heure', 'la IVre heure', '' ],
+ [ 'François Ier', 'François Ier', '' ],
+ [ 'MDCCLXXVIo', 'MDCCLXXVIo', '' ],
+ [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
+ [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
+ [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test.
+ [ 'Le Président', 'Le Président', '' ], // Negative test.
+ [ 'Ce livre est très bon.', 'Ce livre est très bon.', '' ], // Negative test.
+ [ 'De geologische structuur', 'De geologische structuur', '' ], // Negative test.
+ [ 'Me? I like ice cream.', 'Me? I like ice cream.', '' ], // Negative test.
[ 'le XIXe siècle', 'le XIXe siècle', 'ordinal' ],
];
}
@@ -105,7 +120,7 @@ public function provide_smart_ordinal_suffix() {
*
* @uses PHP_Typography\RE::escape_tags
*
- * @dataProvider provide_smart_ordinal_suffix
+ * @dataProvider provide_smart_ordinal_suffix_data
*
* @param string $input HTML input.
* @param string $result Expected result.
@@ -128,7 +143,55 @@ public function test_apply( $input, $result, $css_class ) {
*
* @uses PHP_Typography\RE::escape_tags
*
- * @dataProvider provide_smart_ordinal_suffix
+ * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data
+ *
+ * @param string $input HTML input.
+ * @param string $result Expected result.
+ * @param string $css_class Optional.
+ */
+ public function test_apply_roman_numerals_on( $input, $result, $css_class ) {
+ $this->s->set_smart_ordinal_suffix( true );
+ $this->s->set_smart_ordinal_suffix_match_roman_numerals( true );
+
+ if ( ! empty( $css_class ) ) {
+ $this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class );
+ }
+
+ $this->assertFixResultSame( $input, $result );
+ }
+
+ /**
+ * Test apply.
+ *
+ * @covers ::apply
+ *
+ * @uses PHP_Typography\RE::escape_tags
+ *
+ * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data
+ *
+ * @param string $input HTML input.
+ * @param string $result Expected result.
+ * @param string $css_class Optional.
+ */
+ public function test_apply_roman_numerals_off( $input, $result, $css_class ) {
+ $this->s->set_smart_ordinal_suffix( true );
+
+ if ( ! empty( $css_class ) ) {
+ $this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class );
+ }
+
+ $this->assertFixResultSame( $input, $input );
+ }
+
+ /**
+ * Test apply.
+ *
+ * @covers ::apply
+ *
+ * @uses PHP_Typography\RE::escape_tags
+ *
+ * @dataProvider provide_smart_ordinal_suffix_data
+ * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data
*
* @param string $input HTML input.
* @param string $result Expected result.
@@ -136,6 +199,7 @@ public function test_apply( $input, $result, $css_class ) {
*/
public function test_apply_off( $input, $result, $css_class ) {
$this->s->set_smart_ordinal_suffix( false );
+ $this->s->set_smart_ordinal_suffix_match_roman_numerals( true );
if ( ! empty( $css_class ) ) {
$this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class );