diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d03dcc3..8c7150a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Change History +## 6.4.3 - January 29, 2019 +* _Bugfix_: To prevent common false positives for single-letter Roman ordinals (especially in French and Dutch), Roman numeral matching is now only enabled when `Settings::set_smart_ordinal_suffix_match_roman_numerals` is set to `true`. In addition, only `I`, `V`, and `X` are accepted as single-letter Roman numbers. + ## 6.4.2 - January 27, 2019 * _Bugfix_: The Unicode hyphen character (`‐`) is recognized as a valid word combiner. diff --git a/src/class-settings.php b/src/class-settings.php index 6c0a295b..b9d31ea6 100644 --- a/src/class-settings.php +++ b/src/class-settings.php @@ -264,6 +264,7 @@ public function set_defaults() { $this->set_diacritic_custom_replacements(); $this->set_smart_marks(); $this->set_smart_ordinal_suffix(); + $this->set_smart_ordinal_suffix_match_roman_numerals(); $this->set_smart_math(); $this->set_smart_fractions(); $this->set_smart_exponents(); @@ -731,6 +732,17 @@ public function set_smart_ordinal_suffix( $on = true ) { $this->data['smartOrdinalSuffix'] = $on; } + /** + * Enables/disables replacement of XXe with XXe. + * + * @since 6.5.0 + * + * @param bool $on Optional. Default false. + */ + public function set_smart_ordinal_suffix_match_roman_numerals( $on = false ) { + $this->data['smartOrdinalSuffixRomanNumerals'] = $on; + } + /** * Enables/disables forcing single character words to next line with the insertion of  . * diff --git a/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php b/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php index 2dede364..4f9588ad 100644 --- a/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php +++ b/src/fixes/node-fixes/class-smart-ordinal-suffix-fix.php @@ -43,15 +43,45 @@ */ class Smart_Ordinal_Suffix_Fix extends Abstract_Node_Fix { - const RE_ARABIC_ORDINALS = '/' . self::WORD_BOUNDARY_START . '(\d+)(' . self::ENGLISH_SUFFIXES . '|' . self::FRENCH_SUFFIXES . '|' . self::LATIN_SUFFIXES . ')' . self::WORD_BOUNDARY_END . '/Su'; - const ENGLISH_SUFFIXES = 'st|nd|rd|th'; - const FRENCH_SUFFIXES = 'er|re|e|ère|d|nd|nde|e|de|me|ème|è'; - const LATIN_SUFFIXES = 'o'; - const RE_ROMAN_ORDINALS = '/' . self::WORD_BOUNDARY_START . '(' . self::ROMAN_NUMERALS . ')(' . self::FRENCH_SUFFIXES . '|' . self::LATIN_SUFFIXES . ')' . self::WORD_BOUNDARY_END . '/Sxu'; - const ROMAN_NUMERALS = '(?=[MDCLXVI])M*(?:C[MD]|D?C*)(?:X[CL]|L?X*)(?:I[XV]|V?I*)'; + // Possible suffixes. + const ENGLISH_SUFFIXES = 'st|nd|rd|th'; + const FRENCH_SUFFIXES = 'er|re|e|ère|d|nd|nde|de|me|ème|è'; + const LATIN_SUFFIXES = 'o'; + + // Ordinals with arabic numerals. + const RE_ARABIC_ORDINALS = '/' . + self::WORD_BOUNDARY_START . ' + (\d+) + (' . + self::ENGLISH_SUFFIXES . '|' . + self::FRENCH_SUFFIXES . '|' . + self::LATIN_SUFFIXES . ' + )' . + self::WORD_BOUNDARY_END . ' + /Sxu'; + + // Ordinals with Roman numerals. + const RE_ROMAN_ORDINALS = '/' . + self::WORD_BOUNDARY_START . ' + ( + # Prevent single letter numbers other than I, V, and X. + (?=(?:I|V|X|' . self::ROMAN_NUMERALS . '{2,})) + + # Otherwise, allow all valid Roman numbers. + (?=' . self::ROMAN_NUMERALS . ')M*(?:C[MD]|D?C*)(?:X[CL]|L?X*)(?:I[XV]|V?I*) + ) + (' . + self::FRENCH_SUFFIXES . '|' . + self::LATIN_SUFFIXES . ' + )' . + self::WORD_BOUNDARY_END . ' + /Sxu'; + + // Additional character classes. + const ROMAN_NUMERALS = '[MDCLXVI]'; // Zero-width spaces and soft hyphens should not be treated as word boundaries. - const WORD_BOUNDARY_START = '\b(?![' . U::SOFT_HYPHEN . U::ZERO_WIDTH_SPACE . '])'; + const WORD_BOUNDARY_START = '\b(?data = \preg_replace( [ self::RE_ARABIC_ORDINALS, self::RE_ROMAN_ORDINALS ], $this->replacement, $textnode->data ); + // Always match Arabic numbers. + $patterns = [ self::RE_ARABIC_ORDINALS ]; + + // Only match Roman numbers if explicitely enabled. + if ( ! empty( $settings['smartOrdinalSuffixRomanNumerals'] ) ) { + $patterns[] = self::RE_ROMAN_ORDINALS; + } + + $textnode->data = \preg_replace( $patterns, $this->replacement, $textnode->data ); } } diff --git a/tests/class-settings-test.php b/tests/class-settings-test.php index 4afff2b0..34e1b693 100644 --- a/tests/class-settings-test.php +++ b/tests/class-settings-test.php @@ -885,6 +885,19 @@ public function test_set_smart_ordinal_suffix() { $this->assertFalse( $this->settings['smartOrdinalSuffix'] ); } + /** + * Tests set_smart_ordinal_suffix_match_roman_numerals. + * + * @covers ::set_smart_ordinal_suffix_match_roman_numerals + */ + public function test_set_smart_ordinal_suffix_match_roman_numerals() { + $this->settings->set_smart_ordinal_suffix_match_roman_numerals( true ); + $this->assertTrue( $this->settings['smartOrdinalSuffixRomanNumerals'] ); + + $this->settings->set_smart_ordinal_suffix_match_roman_numerals( false ); + $this->assertFalse( $this->settings['smartOrdinalSuffixRomanNumerals'] ); + } + /** * Tests set_single_character_word_spacing. * diff --git a/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php b/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php index 810a064d..37ad4b96 100644 --- a/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php +++ b/tests/fixes/node-fixes/class-smart-ordinal-suffix-fix-test.php @@ -76,24 +76,39 @@ public function test_array_constructor() { * * @return array */ - public function provide_smart_ordinal_suffix() { + public function provide_smart_ordinal_suffix_data() { return [ [ 'in the 1st instance', 'in the 1st instance', '' ], [ 'in the 2nd degree', 'in the 2nd degree', '' ], [ 'a 3rd party', 'a 3rd party', '' ], [ '12th Night', '12th Night', '' ], - [ 'la IIIIre heure', 'la IIIIre heure', '' ], - [ 'François Ier', 'François Ier', '' ], - [ 'MDCCLXXVIo', 'MDCCLXXVIo', '' ], - [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test. - [ 'Cer­tain HTML entities', 'Cer­tain HTML entities', '' ], // Negative test. - [ 'Cer​tain HTML entities', 'Cer​tain HTML entities', '' ], // Negative test. [ 'in the 1st instance, we', 'in the 1st instance, we', 'ordinal' ], [ 'murder in the 2nd degree', 'murder in the 2nd degree', 'ordinal' ], [ 'a 3rd party', 'a 3rd party', 'ordinal' ], [ 'the 12th Night', 'the 12th Night', 'ordinal' ], [ 'la 1ère guerre', 'la 1ère guerre', 'ordinal' ], [ 'la 1re guerre mondiale', 'la 1re guerre mondiale', 'ordinal' ], + ]; + } + + /** + * Provide data for testing ordinal suffixes. + * + * @return array + */ + public function provide_smart_ordinal_suffix_roman_numeral_data() { + return [ + [ 'la IIIIre heure', 'la IIIIre heure', '' ], + [ 'la IVre heure', 'la IVre heure', '' ], + [ 'François Ier', 'François Ier', '' ], + [ 'MDCCLXXVIo', 'MDCCLXXVIo', '' ], + [ 'Certain HTML entities', 'Certain HTML entities', '' ], // Negative test. + [ 'Cer­tain HTML entities', 'Cer­tain HTML entities', '' ], // Negative test. + [ 'Cer​tain HTML entities', 'Cer​tain HTML entities', '' ], // Negative test. + [ 'Le Président', 'Le Président', '' ], // Negative test. + [ 'Ce livre est très bon.', 'Ce livre est très bon.', '' ], // Negative test. + [ 'De geologische structuur', 'De geologische structuur', '' ], // Negative test. + [ 'Me? I like ice cream.', 'Me? I like ice cream.', '' ], // Negative test. [ 'le XIXe siècle', 'le XIXe siècle', 'ordinal' ], ]; } @@ -105,7 +120,7 @@ public function provide_smart_ordinal_suffix() { * * @uses PHP_Typography\RE::escape_tags * - * @dataProvider provide_smart_ordinal_suffix + * @dataProvider provide_smart_ordinal_suffix_data * * @param string $input HTML input. * @param string $result Expected result. @@ -128,7 +143,55 @@ public function test_apply( $input, $result, $css_class ) { * * @uses PHP_Typography\RE::escape_tags * - * @dataProvider provide_smart_ordinal_suffix + * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data + * + * @param string $input HTML input. + * @param string $result Expected result. + * @param string $css_class Optional. + */ + public function test_apply_roman_numerals_on( $input, $result, $css_class ) { + $this->s->set_smart_ordinal_suffix( true ); + $this->s->set_smart_ordinal_suffix_match_roman_numerals( true ); + + if ( ! empty( $css_class ) ) { + $this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class ); + } + + $this->assertFixResultSame( $input, $result ); + } + + /** + * Test apply. + * + * @covers ::apply + * + * @uses PHP_Typography\RE::escape_tags + * + * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data + * + * @param string $input HTML input. + * @param string $result Expected result. + * @param string $css_class Optional. + */ + public function test_apply_roman_numerals_off( $input, $result, $css_class ) { + $this->s->set_smart_ordinal_suffix( true ); + + if ( ! empty( $css_class ) ) { + $this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class ); + } + + $this->assertFixResultSame( $input, $input ); + } + + /** + * Test apply. + * + * @covers ::apply + * + * @uses PHP_Typography\RE::escape_tags + * + * @dataProvider provide_smart_ordinal_suffix_data + * @dataProvider provide_smart_ordinal_suffix_roman_numeral_data * * @param string $input HTML input. * @param string $result Expected result. @@ -136,6 +199,7 @@ public function test_apply( $input, $result, $css_class ) { */ public function test_apply_off( $input, $result, $css_class ) { $this->s->set_smart_ordinal_suffix( false ); + $this->s->set_smart_ordinal_suffix_match_roman_numerals( true ); if ( ! empty( $css_class ) ) { $this->fix = new Node_Fixes\Smart_Ordinal_Suffix_Fix( $css_class );