Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fix Intl for PHP >= 7.3 and ICU >= 56 #171

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ cache:
directories:
- vendor
- $HOME/.composer/cache/files
- $HOME/.build

matrix:
include:
Expand All @@ -18,7 +19,7 @@ matrix:
- php: 7.1
- php: 7.2
- php: 7.3
env: SYMFONY_PHPUNIT_VERSION=7.2
env: SYMFONY_PHPUNIT_VERSION=7.2 ICU_VERSION=63.1
- php: nightly
allow_failures:
- php: nightly
Expand All @@ -33,6 +34,29 @@ before_install:
- if [[ $TRAVIS_PHP_VERSION = 5.* ]]; then echo yes | pecl install -f apcu-4.0.11; fi
- if [[ $TRAVIS_PHP_VERSION = 7.* ]]; then echo yes | pecl install -f apcu_bc-1.0.4; fi
- if [[ $TRAVIS_PHP_VERSION = 7.* ]]; then echo yes | pecl install -f apcu-5.1.11; fi
- |
if [[ $ICU_VERSION ]]; then
ICU_DIR=$HOME/.build/icu-$ICU_VERSION
ICU_PHP_VERSION=$(php -r "echo PHP_VERSION;")
ICU_PHP_DIR=$HOME/.build/php-$ICU_PHP_VERSION-icu-$ICU_VERSION
export ICU_PHP=$ICU_PHP_DIR/bin/php
if [ ! -f $ICU_PHP ]; then
wget -O icu-src.tgz http://download.icu-project.org/files/icu4c/$ICU_VERSION/icu4c-$(echo $ICU_VERSION | tr '.' '_')-src.tgz
mkdir icu-src && tar xzf icu-src.tgz -C icu-src --strip-components=1
pushd icu-src/source
./configure --prefix=$ICU_DIR
make && make install
popd
wget -O php-src.tgz http://us1.php.net/get/php-$ICU_PHP_VERSION.tar.gz/from/this/mirror
mkdir php-src && tar xzf php-src.tgz -C php-src --strip-components=1
pushd php-src
./configure --prefix=$ICU_PHP_DIR --enable-intl --with-icu-dir=$ICU_DIR
make && make install
popd
fi
$ICU_PHP -r "echo INTL_ICU_VERSION.PHP_EOL;"
$ICU_PHP -r "var_dump((new ReflectionClass('Normalizer'))->getConstants());"
fi
- php -i

install:
Expand All @@ -41,3 +65,4 @@ install:

script:
- ./vendor/bin/simple-phpunit
- if [[ $ICU_PHP ]]; then $ICU_PHP ./vendor/bin/simple-phpunit --filter 'Symfony\\Polyfill\\Tests\\Intl'; fi
311 changes: 311 additions & 0 deletions src/Intl/Normalizer/BaseNormalizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Polyfill\Intl\Normalizer;

/**
* Normalizer is a PHP fallback implementation of the Normalizer class provided by the intl extension.
*
* It has been validated with Unicode 6.3 Normalization Conformance Test.
* See http://www.unicode.org/reports/tr15/ for detailed info about Unicode normalizations.
*
* @author Nicolas Grekas <[email protected]>
*
* @internal
*/
class BaseNormalizer
{
const NONE = 1;
const FORM_D = 2;
const FORM_KD = 3;
const FORM_C = 4;
const FORM_KC = 5;
const NFD = 2;
const NFKD = 3;
const NFC = 4;
const NFKC = 5;

private static $C;
private static $D;
private static $KD;
private static $cC;
private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
private static $ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";

public static function isNormalized($s, $form = self::NFC)
{
if (!static::isFormNormalized($form)) {
return false;
}

$s = (string) $s;
if (!isset($s[strspn($s, self::$ASCII)])) {
return true;
}
if (static::NFC === $form && preg_match('//u', $s) && !preg_match('/[^\x00-\x{2FF}]/u', $s)) {
return true;
}

return false; // Pretend false as quick checks implementented in PHP won't be so quick
}

public static function normalize($s, $form = self::NFC)
{
$s = (string) $s;
if (!preg_match('//u', $s)) {
return false;
}

switch ($form) {
case static::NONE: return $s;
case static::NFC: $C = true; $K = false; break;
case static::NFD: $C = false; $K = false; break;
case static::NFKC: $C = true; $K = true; break;
case static::NFKD: $C = false; $K = true; break;
default: return false;
}

if ('' === $s) {
return '';
}

if ($K && null === self::$KD) {
self::$KD = self::getData('compatibilityDecomposition');
}

if (null === self::$D) {
self::$D = self::getData('canonicalDecomposition');
self::$cC = self::getData('combiningClass');
}

if (null !== $mbEncoding = (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) ? mb_internal_encoding() : null) {
mb_internal_encoding('8bit');
}

$r = self::decompose($s, $K);

if ($C) {
if (null === self::$C) {
self::$C = self::getData('canonicalComposition');
}

$r = self::recompose($r);
}
if (null !== $mbEncoding) {
mb_internal_encoding($mbEncoding);
}

return $r;
}

protected static function isFormNormalized($form)
{
return $form > static::NONE && $form <= static::NFKC;
}

private static function recompose($s)
{
$ASCII = self::$ASCII;
$compMap = self::$C;
$combClass = self::$cC;
$ulenMask = self::$ulenMask;

$result = $tail = '';

$i = $s[0] < "\x80" ? 1 : $ulenMask[$s[0] & "\xF0"];
$len = \strlen($s);

$lastUchr = substr($s, 0, $i);
$lastUcls = isset($combClass[$lastUchr]) ? 256 : 0;

while ($i < $len) {
if ($s[$i] < "\x80") {
// ASCII chars

if ($tail) {
$lastUchr .= $tail;
$tail = '';
}

if ($j = strspn($s, $ASCII, $i + 1)) {
$lastUchr .= substr($s, $i, $j);
$i += $j;
}

$result .= $lastUchr;
$lastUchr = $s[$i];
$lastUcls = 0;
++$i;
continue;
}

$ulen = $ulenMask[$s[$i] & "\xF0"];
$uchr = substr($s, $i, $ulen);

if ($lastUchr < "\xE1\x84\x80" || "\xE1\x84\x92" < $lastUchr
|| $uchr < "\xE1\x85\xA1" || "\xE1\x85\xB5" < $uchr
|| $lastUcls) {
// Table lookup and combining chars composition

$ucls = isset($combClass[$uchr]) ? $combClass[$uchr] : 0;

if (isset($compMap[$lastUchr.$uchr]) && (!$lastUcls || $lastUcls < $ucls)) {
$lastUchr = $compMap[$lastUchr.$uchr];
} elseif ($lastUcls = $ucls) {
$tail .= $uchr;
} else {
if ($tail) {
$lastUchr .= $tail;
$tail = '';
}

$result .= $lastUchr;
$lastUchr = $uchr;
}
} else {
// Hangul chars

$L = \ord($lastUchr[2]) - 0x80;
$V = \ord($uchr[2]) - 0xA1;
$T = 0;

$uchr = substr($s, $i + $ulen, 3);

if ("\xE1\x86\xA7" <= $uchr && $uchr <= "\xE1\x87\x82") {
$T = \ord($uchr[2]) - 0xA7;
0 > $T && $T += 0x40;
$ulen += 3;
}

$L = 0xAC00 + ($L * 21 + $V) * 28 + $T;
$lastUchr = \chr(0xE0 | $L >> 12).\chr(0x80 | $L >> 6 & 0x3F).\chr(0x80 | $L & 0x3F);
}

$i += $ulen;
}

return $result.$lastUchr.$tail;
}

private static function decompose($s, $c)
{
$result = '';

$ASCII = self::$ASCII;
$decompMap = self::$D;
$combClass = self::$cC;
$ulenMask = self::$ulenMask;
if ($c) {
$compatMap = self::$KD;
}

$c = array();
$i = 0;
$len = \strlen($s);

while ($i < $len) {
if ($s[$i] < "\x80") {
// ASCII chars

if ($c) {
ksort($c);
$result .= implode('', $c);
$c = array();
}

$j = 1 + strspn($s, $ASCII, $i + 1);
$result .= substr($s, $i, $j);
$i += $j;
continue;
}

$ulen = $ulenMask[$s[$i] & "\xF0"];
$uchr = substr($s, $i, $ulen);
$i += $ulen;

if ($uchr < "\xEA\xB0\x80" || "\xED\x9E\xA3" < $uchr) {
// Table lookup

if ($uchr !== $j = isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr)) {
$uchr = $j;

$j = \strlen($uchr);
$ulen = $uchr[0] < "\x80" ? 1 : $ulenMask[$uchr[0] & "\xF0"];

if ($ulen != $j) {
// Put trailing chars in $s

$j -= $ulen;
$i -= $j;

if (0 > $i) {
$s = str_repeat(' ', -$i).$s;
$len -= $i;
$i = 0;
}

while ($j--) {
$s[$i + $j] = $uchr[$ulen + $j];
}

$uchr = substr($uchr, 0, $ulen);
}
}
if (isset($combClass[$uchr])) {
// Combining chars, for sorting

if (!isset($c[$combClass[$uchr]])) {
$c[$combClass[$uchr]] = '';
}
$c[$combClass[$uchr]] .= $uchr;
continue;
}
} else {
// Hangul chars

$uchr = unpack('C*', $uchr);
$j = (($uchr[1] - 224) << 12) + (($uchr[2] - 128) << 6) + $uchr[3] - 0xAC80;

$uchr = "\xE1\x84".\chr(0x80 + (int) ($j / 588))
."\xE1\x85".\chr(0xA1 + (int) (($j % 588) / 28));

if ($j %= 28) {
$uchr .= $j < 25
? ("\xE1\x86".\chr(0xA7 + $j))
: ("\xE1\x87".\chr(0x67 + $j));
}
}
if ($c) {
ksort($c);
$result .= implode('', $c);
$c = array();
}

$result .= $uchr;
}

if ($c) {
ksort($c);
$result .= implode('', $c);
}

return $result;
}

private static function getData($file)
{
if (file_exists($file = __DIR__.'/Resources/unidata/'.$file.'.php')) {
return require $file;
}

return false;
}
}
Loading