From 141ca21505527afc6f3e4606c718e09bcfd27c03 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Wed, 22 Sep 2021 14:26:42 +0200 Subject: [PATCH 01/23] Library - Replace phputf8 with Portable UTF-8 --- composer.json | 109 ++++--- src/StringHelper.php | 293 +++++++++--------- src/phputf8/LICENSE | 504 ------------------------------- src/phputf8/README | 82 ----- src/phputf8/mbstring/core.php | 132 -------- src/phputf8/native/core.php | 430 -------------------------- src/phputf8/ord.php | 96 ------ src/phputf8/str_ireplace.php | 77 ----- src/phputf8/str_pad.php | 54 ---- src/phputf8/str_split.php | 32 -- src/phputf8/strcasecmp.php | 23 -- src/phputf8/strcspn.php | 38 --- src/phputf8/stristr.php | 35 --- src/phputf8/strrev.php | 19 -- src/phputf8/strspn.php | 38 --- src/phputf8/substr_replace.php | 22 -- src/phputf8/trim.php | 63 ---- src/phputf8/ucfirst.php | 31 -- src/phputf8/ucwords.php | 40 --- src/phputf8/utf8.php | 81 ----- src/phputf8/utils/ascii.php | 214 ------------- src/phputf8/utils/bad.php | 412 ------------------------- src/phputf8/utils/patterns.php | 64 ---- src/phputf8/utils/position.php | 168 ----------- src/phputf8/utils/specials.php | 126 -------- src/phputf8/utils/unicode.php | 271 ----------------- src/phputf8/utils/validation.php | 187 ------------ 27 files changed, 206 insertions(+), 3435 deletions(-) delete mode 100644 src/phputf8/LICENSE delete mode 100644 src/phputf8/README delete mode 100644 src/phputf8/mbstring/core.php delete mode 100644 src/phputf8/native/core.php delete mode 100644 src/phputf8/ord.php delete mode 100644 src/phputf8/str_ireplace.php delete mode 100644 src/phputf8/str_pad.php delete mode 100644 src/phputf8/str_split.php delete mode 100644 src/phputf8/strcasecmp.php delete mode 100644 src/phputf8/strcspn.php delete mode 100644 src/phputf8/stristr.php delete mode 100644 src/phputf8/strrev.php delete mode 100644 src/phputf8/strspn.php delete mode 100644 src/phputf8/substr_replace.php delete mode 100644 src/phputf8/trim.php delete mode 100644 src/phputf8/ucfirst.php delete mode 100644 src/phputf8/ucwords.php delete mode 100644 src/phputf8/utf8.php delete mode 100644 src/phputf8/utils/ascii.php delete mode 100644 src/phputf8/utils/bad.php delete mode 100644 src/phputf8/utils/patterns.php delete mode 100644 src/phputf8/utils/position.php delete mode 100644 src/phputf8/utils/specials.php delete mode 100644 src/phputf8/utils/unicode.php delete mode 100644 src/phputf8/utils/validation.php diff --git a/composer.json b/composer.json index 961171a7..c6d05746 100644 --- a/composer.json +++ b/composer.json @@ -1,58 +1,55 @@ { - "name": "joomla/string", - "type": "joomla-package", - "description": "Joomla String Package", - "keywords": ["joomla", "framework", "string"], - "homepage": "https://github.com/joomla-framework/string", - "license": "GPL-2.0-or-later", - "require": { - "php": "^7.2.5", - "symfony/deprecation-contracts": "^2.1" - }, - "require-dev": { - "doctrine/inflector": "1.2", - "joomla/coding-standards": "^3.0@dev", - "joomla/test": "^2.0", - "phpunit/phpunit": "^8.5|^9.0" - }, - "conflict": { - "doctrine/inflector": "<1.2" - }, - "suggest": { - "ext-mbstring": "For improved processing", - "doctrine/inflector": "To use the string inflector" - }, - "autoload": { - "psr-4": { - "Joomla\\String\\": "src/" - }, - "files": [ - "src/phputf8/utf8.php", - "src/phputf8/ord.php", - "src/phputf8/str_ireplace.php", - "src/phputf8/str_pad.php", - "src/phputf8/str_split.php", - "src/phputf8/strcasecmp.php", - "src/phputf8/strcspn.php", - "src/phputf8/stristr.php", - "src/phputf8/strrev.php", - "src/phputf8/strspn.php", - "src/phputf8/trim.php", - "src/phputf8/ucfirst.php", - "src/phputf8/ucwords.php", - "src/phputf8/utils/ascii.php", - "src/phputf8/utils/validation.php" - ] - }, - "autoload-dev": { - "psr-4": { - "Joomla\\String\\Tests\\": "Tests/" - } - }, - "minimum-stability": "dev", - "extra": { - "branch-alias": { - "dev-2.0-dev": "2.0-dev" - } - } + "name": "joomla/string", + "type": "joomla-package", + "description": "Joomla String Package", + "keywords": [ + "joomla", + "framework", + "string" + ], + "homepage": "https://github.com/joomla-framework/string", + "license": "GPL-2.0-or-later", + "require": { + "php": "^7.2.5", + "symfony/deprecation-contracts": "^2.1", + "voku/portable-utf8": "^4.0|^5.0" + }, + "require-dev": { + "roave/security-advisories": "dev-latest", + "doctrine/inflector": "1.2", + "joomla/coding-standards": "^3.0@dev", + "joomla/test": "^2.0", + "phpunit/phpunit": "^8.5|^9.0" + }, + "conflict": { + "doctrine/inflector": "<1.2" + }, + "replace": { + "symfony/polyfill-php72": "1.99", + "symfony/polyfill-iconv": "1.99", + "symfony/polyfill-intl-grapheme": "1.99", + "symfony/polyfill-intl-normalizer": "1.99", + "symfony/polyfill-mbstring": "1.99" + }, + "suggest": { + "ext-mbstring": "For improved performance", + "ext-iconv": "For improved performance", + "doctrine/inflector": "To use the string inflector" + }, + "autoload": { + "psr-4": { + "Joomla\\String\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "Joomla\\String\\Tests\\": "Tests/" + } + }, + "minimum-stability": "dev", + "extra": { + "branch-alias": { + "dev-2.0-dev": "2.0-dev" + } + } } diff --git a/src/StringHelper.php b/src/StringHelper.php index e6c3a344..924baee9 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -8,6 +8,8 @@ namespace Joomla\String; +use voku\helper\UTF8; + @ini_set('default_charset', 'UTF-8'); /** @@ -93,11 +95,12 @@ public static function increment($string, $style = 'default', $n = null) * * @return boolean True if the string is all ASCII * - * @since 1.3.0 + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::is_ascii() instead. */ public static function is_ascii($str) { - return utf8_is_ascii($str); + return UTF8::is_ascii($str); } /** @@ -109,12 +112,13 @@ public static function is_ascii($str) * * @return integer Unicode ordinal for the character * - * @link https://www.php.net/ord - * @since 1.4.0 + * @link https://www.php.net/ord + * @since 1.4.0 + * @deprecated 3.0 Please use UTF8::ord() instead. */ public static function ord($chr) { - return utf8_ord($chr); + return UTF8::ord($chr); } /** @@ -132,17 +136,18 @@ public static function ord($chr) * start at 0, and not 1. * Returns false if the needle was not found. * - * @link https://www.php.net/strpos - * @since 1.3.0 + * @link https://www.php.net/strpos + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strpos() instead. */ public static function strpos($haystack, $needle, $offset = null) { if ($offset === null) { - return utf8_strpos($haystack, $needle); + return UTF8::strpos($haystack, $needle); } - return utf8_strpos($haystack, $needle, $offset); + return UTF8::strpos($haystack, $needle, $offset); } /** @@ -161,12 +166,18 @@ public static function strpos($haystack, $needle, $offset = null) * start at 0, and not 1. * Returns false if the needle was not found. * - * @link https://www.php.net/strrpos - * @since 1.3.0 + * @link https://www.php.net/strrpos + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strrpos() instead. */ - public static function strrpos($haystack, $needle, $offset = 0) + public static function strrpos($haystack, $needle, $offset = null) { - return utf8_strrpos($haystack, $needle, $offset ?? 0); + if ($offset === null) + { + $offset = 0; + } + + return UTF8::strrpos($haystack, $needle, $offset); } /** @@ -180,17 +191,18 @@ public static function strrpos($haystack, $needle, $offset = 0) * * @return string|boolean * - * @link https://www.php.net/substr - * @since 1.3.0 + * @link https://www.php.net/substr + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::substr() instead. */ public static function substr($str, $offset, $length = null) { if ($length === null) { - return utf8_substr($str, $offset); + return UTF8::substr($str, $offset); } - return utf8_substr($str, $offset, $length); + return UTF8::substr($str, $offset, $length); } /** @@ -205,12 +217,13 @@ public static function substr($str, $offset, $length = null) * * @return string|boolean Either string in lowercase or FALSE is UTF-8 invalid * - * @link https://www.php.net/strtolower - * @since 1.3.0 + * @link https://www.php.net/strtolower + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strtolower() instead. */ public static function strtolower($str) { - return utf8_strtolower($str); + return UTF8::strtolower($str); } /** @@ -225,12 +238,13 @@ public static function strtolower($str) * * @return string|boolean Either string in uppercase or FALSE is UTF-8 invalid * - * @link https://www.php.net/strtoupper - * @since 1.3.0 + * @link https://www.php.net/strtoupper + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strtoupper() instead. */ public static function strtoupper($str) { - return utf8_strtoupper($str); + return UTF8::strtoupper($str); } /** @@ -242,12 +256,13 @@ public static function strtoupper($str) * * @return integer Number of UTF-8 characters in string. * - * @link https://www.php.net/strlen - * @since 1.3.0 + * @link https://www.php.net/strlen + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strlen() instead. */ public static function strlen($str) { - return utf8_strlen($str); + return UTF8::strlen($str); } /** @@ -265,12 +280,13 @@ public static function strlen($str) * * @return string UTF-8 String * - * @link https://www.php.net/str_ireplace - * @since 1.3.0 + * @link https://www.php.net/str_ireplace + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::str_ireplace() instead. */ public static function str_ireplace($search, $replace, $subject, &$count = null) { - return utf8_ireplace($search, $replace, $subject, $count); + return UTF8::str_ireplace($search, $replace, $subject, $count); } /** @@ -287,12 +303,13 @@ public static function str_ireplace($search, $replace, $subject, &$count = null) * * @return string * - * @link https://www.php.net/str_pad - * @since 1.4.0 + * @link https://www.php.net/str_pad + * @since 1.4.0 + * @deprecated 3.0 Please use UTF8::str_pad() instead. */ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) { - return utf8_str_pad($input, $length, $padStr, $type); + return UTF8::str_pad($input, $length, $padStr, $type); } /** @@ -305,12 +322,13 @@ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_R * * @return array|string|boolean * - * @link https://www.php.net/str_split - * @since 1.3.0 + * @link https://www.php.net/str_split + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::str_split() instead. */ public static function str_split($str, $splitLen = 1) { - return utf8_str_split($str, $splitLen); + return UTF8::str_split($str, $splitLen); } /** @@ -324,16 +342,16 @@ public static function str_split($str, $splitLen = 1) * * @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. * - * @link https://www.php.net/strcasecmp - * @link https://www.php.net/strcoll - * @link https://www.php.net/setlocale - * @since 1.3.0 + * @link https://www.php.net/strcasecmp + * @link https://www.php.net/strcoll + * @link https://www.php.net/setlocale + * @since 1.3.0 */ public static function strcasecmp($str1, $str2, $locale = false) { if ($locale === false) { - return utf8_strcasecmp($str1, $str2); + return UTF8::strcasecmp($str1, $str2); } $encoding = self::setLocale($locale); @@ -341,12 +359,12 @@ public static function strcasecmp($str1, $str2, $locale = false) // If we successfully set encoding it to utf-8 or encoding is sth weird don't recode if ($encoding === 'UTF-8' || $encoding === 'nonrecodable') { - return strcoll(utf8_strtolower($str1), utf8_strtolower($str2)); + return strcoll(UTF8::strtolower($str1), UTF8::strtolower($str2)); } return strcoll( - static::transcode(utf8_strtolower($str1), 'UTF-8', $encoding), - static::transcode(utf8_strtolower($str2), 'UTF-8', $encoding) + static::transcode(UTF8::strtolower($str1), 'UTF-8', $encoding), + static::transcode(UTF8::strtolower($str2), 'UTF-8', $encoding) ); } @@ -361,16 +379,17 @@ public static function strcasecmp($str1, $str2, $locale = false) * * @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. * - * @link https://www.php.net/strcmp - * @link https://www.php.net/strcoll - * @link https://www.php.net/setlocale - * @since 1.3.0 + * @link https://www.php.net/strcmp + * @link https://www.php.net/strcoll + * @link https://www.php.net/setlocale + * @since 1.3.0 + * @since __DEPLOY_VERSION__ 'locale' parameter is ignored */ public static function strcmp($str1, $str2, $locale = false) { if ($locale === false) { - return strcmp($str1, $str2); + return UTF8::strcmp($str1, $str2); } $encoding = self::setLocale($locale); @@ -381,7 +400,10 @@ public static function strcmp($str1, $str2, $locale = false) return strcoll($str1, $str2); } - return strcoll(static::transcode($str1, 'UTF-8', $encoding), static::transcode($str2, 'UTF-8', $encoding)); + return strcoll( + static::transcode($str1, 'UTF-8', $encoding), + static::transcode($str2, 'UTF-8', $encoding) + ); } /** @@ -396,8 +418,9 @@ public static function strcmp($str1, $str2, $locale = false) * * @return integer The length of the initial segment of str1 which does not contain any of the characters in str2 * - * @link https://www.php.net/strcspn - * @since 1.3.0 + * @link https://www.php.net/strcspn + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strcspn() instead. */ public static function strcspn($str, $mask, $start = null, $length = null) { @@ -405,13 +428,13 @@ public static function strcspn($str, $mask, $start = null, $length = null) { if ($start === null) { - return utf8_strcspn($str, $mask); + return UTF8::strcspn($str, $mask); } - return utf8_strcspn($str, $mask, $start); + return UTF8::strcspn($str, $mask, $start); } - return utf8_strcspn($str, $mask, $start, $length); + return UTF8::strcspn($str, $mask, $start, $length); } /** @@ -427,12 +450,13 @@ public static function strcspn($str, $mask, $start = null, $length = null) * * @return string|boolean * - * @link https://www.php.net/stristr - * @since 1.3.0 + * @link https://www.php.net/stristr + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::stristr() instead. */ public static function stristr($str, $search) { - return utf8_stristr($str, $search); + return UTF8::stristr($str, $search); } /** @@ -444,12 +468,13 @@ public static function stristr($str, $search) * * @return string The string in reverse character order * - * @link https://www.php.net/strrev - * @since 1.3.0 + * @link https://www.php.net/strrev + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strrev() instead. */ public static function strrev($str) { - return utf8_strrev($str); + return UTF8::strrev($str); } /** @@ -464,8 +489,9 @@ public static function strrev($str) * * @return integer * - * @link https://www.php.net/strspn - * @since 1.3.0 + * @link https://www.php.net/strspn + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::strspn() instead. */ public static function strspn($str, $mask, $start = null, $length = null) { @@ -473,13 +499,13 @@ public static function strspn($str, $mask, $start = null, $length = null) { if ($start === null) { - return utf8_strspn($str, $mask); + return UTF8::strspn($str, $mask); } - return utf8_strspn($str, $mask, $start); + return UTF8::strspn($str, $mask, $start); } - return utf8_strspn($str, $mask, $start, $length); + return UTF8::strspn($str, $mask, $start ?? 0, $length); } /** @@ -494,17 +520,18 @@ public static function strspn($str, $mask, $start = null, $length = null) * * @return string * - * @link https://www.php.net/substr_replace - * @since 1.3.0 + * @link https://www.php.net/substr_replace + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::substr_replace() instead. */ public static function substr_replace($str, $repl, $start, $length = null) { if ($length === false) { - return utf8_substr_replace($str, $repl, $start); + $length = null; } - return utf8_substr_replace($str, $repl, $start, $length); + return UTF8::substr_replace($str, $repl, $start, $length); } /** @@ -515,27 +542,28 @@ public static function substr_replace($str, $repl, $start, $length = null) * You only need to use this if you are supplying the char list optional arg, and it contains UTF-8 characters. * Otherwise, ltrim will work normally on a UTF-8 string. * - * @param string $str The string to be trimmed - * @param string|boolean $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed + * @param string|boolean|null $charlist The optional charlist of additional characters to trim * * @return string The trimmed string * - * @link https://www.php.net/ltrim - * @since 1.3.0 + * @link https://www.php.net/ltrim + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::ltrim() instead. */ public static function ltrim($str, $charlist = false) { - if ($charlist === false) + if ($charlist === '') { - return utf8_ltrim($str); + return $str; } - if (empty($charlist)) + if ($charlist === false) { - return $str; + $charlist = null; } - return utf8_ltrim($str, $charlist); + return UTF8::ltrim($str, $charlist); } /** @@ -546,27 +574,28 @@ public static function ltrim($str, $charlist = false) * You only need to use this if you are supplying the char list optional arg, and it contains UTF-8 characters. * Otherwise, rtrim will work normally on a UTF-8 string. * - * @param string $str The string to be trimmed - * @param string|boolean $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed + * @param string|boolean|null $charlist The optional charlist of additional characters to trim * * @return string The trimmed string * - * @link https://www.php.net/rtrim - * @since 1.3.0 + * @link https://www.php.net/rtrim + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::rtrim() instead. */ public static function rtrim($str, $charlist = false) { - if ($charlist === false) + if ($charlist === '') { - return utf8_rtrim($str); + return $str; } - if (empty($charlist)) + if ($charlist === false) { - return $str; + $charlist = null; } - return utf8_rtrim($str, $charlist); + return UTF8::rtrim($str, $charlist); } /** @@ -577,27 +606,28 @@ public static function rtrim($str, $charlist = false) * You only need to use this if you are supplying the charlist optional arg and it contains UTF-8 characters. * Otherwise, trim will work normally on a UTF-8 string * - * @param string $str The string to be trimmed - * @param string|boolean $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed + * @param string|boolean|null $charlist The optional charlist of additional characters to trim * * @return string The trimmed string * - * @link https://www.php.net/trim - * @since 1.3.0 + * @link https://www.php.net/trim + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::trim() instead. */ public static function trim($str, $charlist = false) { - if ($charlist === false) + if ($charlist === '') { - return utf8_trim($str); + return $str; } - if (empty($charlist)) + if ($charlist === false) { - return $str; + $charlist = null; } - return utf8_trim($str, $charlist); + return UTF8::trim($str, $charlist); } /** @@ -620,7 +650,7 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) { if ($delimiter === null) { - return utf8_ucfirst($str); + return UTF8::ucfirst($str); } if ($newDelimiter === null) @@ -628,7 +658,7 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) $newDelimiter = $delimiter; } - return implode($newDelimiter, array_map('utf8_ucfirst', explode($delimiter, $str))); + return implode($newDelimiter, array_map([UTF8::class, 'ucfirst'], explode($delimiter, $str))); } /** @@ -640,12 +670,13 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) * * @return string String with first char of each word uppercase * - * @link https://www.php.net/ucwords - * @since 1.3.0 + * @link https://www.php.net/ucwords + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::ucwords() instead. */ public static function ucwords($str) { - return utf8_ucwords($str); + return UTF8::ucwords($str); } /** @@ -657,9 +688,9 @@ public static function ucwords($str) * * @return string|null The transcoded string, or null if the source was not a string. * - * @link https://bugs.php.net/bug.php?id=48147 + * @link https://bugs.php.net/bug.php?id=48147 * - * @since 1.3.0 + * @since 1.3.0 */ public static function transcode($source, $fromEncoding, $toEncoding) { @@ -677,14 +708,15 @@ public static function transcode($source, $fromEncoding, $toEncoding) * * @return boolean true if valid * - * @author - * @link https://hsivonen.fi/php-utf8/ - * @see compliant - * @since 1.3.0 + * @author + * @link https://hsivonen.fi/php-utf8/ + * @see compliant + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::is_utf8() instead. */ public static function valid($str) { - return utf8_is_valid($str); + return UTF8::is_utf8($str); } /** @@ -700,63 +732,44 @@ public static function valid($str) * * @return boolean TRUE if string is valid UTF-8 * - * @see StringHelper::valid - * @link https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 - * @since 1.3.0 + * @see StringHelper::valid + * @link https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::is_utf8() instead. */ public static function compliant($str) { - return utf8_compliant($str); + return UTF8::is_utf8($str); } /** - * Converts Unicode sequences to UTF-8 string. + * Converts UTF-8 sequences to UTF-8 string. * * @param string $str Unicode string to convert * * @return string UTF-8 string * - * @since 1.3.0 + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::to_utf8_string() instead. */ public static function unicode_to_utf8($str) { - if (\extension_loaded('mbstring')) - { - return preg_replace_callback( - '/\\\\u([0-9a-fA-F]{4})/', - static function ($match) { - return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE'); - }, - $str - ); - } - - return $str; + return UTF8::to_utf8_string($str); } /** - * Converts Unicode sequences to UTF-16 string. + * Converts UTF-16 sequences to UTF-8 string. * * @param string $str Unicode string to convert * * @return string UTF-16 string * - * @since 1.3.0 + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::to_utf8_string() instead. */ public static function unicode_to_utf16($str) { - if (\extension_loaded('mbstring')) - { - return preg_replace_callback( - '/\\\\u([0-9a-fA-F]{4})/', - static function ($match) { - return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UTF-16BE'); - }, - $str - ); - } - - return $str; + return UTF8::to_utf8_string($str); } /** diff --git a/src/phputf8/LICENSE b/src/phputf8/LICENSE deleted file mode 100644 index 28f18896..00000000 --- a/src/phputf8/LICENSE +++ /dev/null @@ -1,504 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! - - diff --git a/src/phputf8/README b/src/phputf8/README deleted file mode 100644 index 6c309054..00000000 --- a/src/phputf8/README +++ /dev/null @@ -1,82 +0,0 @@ -++PHP UTF-8++ - -Version 0.5 - -++DOCUMENTATION++ - -Documentation in progress in ./docs dir - -http://www.phpwact.org/php/i18n/charsets -http://www.phpwact.org/php/i18n/utf-8 - -Important Note: DO NOT use these functions without understanding WHY -you are using them. In particular, do not blindly replace all use of PHP's -string functions which functions found here - most of the time you will -not need to, and you will be introducing a significant performance -overhead to your application. You can get a good idea of when to use what -from reading: http://www.phpwact.org/php/i18n/utf-8 - -Important Note: For sake of performance most of the functions here are -not "defensive" (e.g. there is not extensive parameter checking, well -formed UTF-8 is assumed). This is particularily relevant when is comes to -catching badly formed UTF-8 - you should screen input on the "outer -perimeter" with help from functions in the utf8_validation.php and -utf8_bad.php files. - -Important Note: this library treats ALL ASCII characters as valid, including ASCII control characters. But if you use some ASCII control characters in XML, it will render the XML ill-formed. Don't be a bozo: http://hsivonen.iki.fi/producing-xml/#controlchar - -++BUGS / SUPPORT / FEATURE REQUESTS ++ - -Please report bugs to: -http://sourceforge.net/tracker/?group_id=142846&atid=753842 -- if you are able, please submit a failing unit test -(http://www.lastcraft.com/simple_test.php) with your bug report. - -For feature requests / faster implementation of functions found here, -please drop them in via the RFE tracker: http://sourceforge.net/tracker/?group_id=142846&atid=753845 -Particularily interested in faster implementations! - -For general support / help, use: -http://sourceforge.net/tracker/?group_id=142846&atid=753843 - -In the VERY WORST case, you can email me: hfuecks gmail com - I tend to be slow to respond though so be warned. - -Important Note: when reporting bugs, please provide the following -information; - -PHP version, whether the iconv extension is loaded (in PHP5 it's -there by default), whether the mbstring extension is loaded. The -following PHP script can be used to determine this information; - -"; -if ( extension_loaded('mbstring') ) { - print "mbstring available
"; -} else { - print "mbstring not available
"; -} -if ( extension_loaded('iconv') ) { - print "iconv available
"; -} else { - print "iconv not available
"; -} -?> - -++LICENSING++ - -Parts of the code in this library come from other places, under different -licenses. -The authors involved have been contacted (see below). Attribution for -which code came from elsewhere can be found in the source code itself. - -+Andreas Gohr / Chris Smith - Dokuwiki -There is a fair degree of collaboration / exchange of ideas and code -beteen Dokuwiki's UTF-8 library; -http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -and phputf8. Although Dokuwiki is released under GPL, its UTF-8 -library is released under LGPL, hence no conflict with phputf8 - -+Henri Sivonen (http://hsivonen.iki.fi/php-utf8/ / -http://hsivonen.iki.fi/php-utf8/) has also given permission for his -code to be released under the terms of the LGPL. He ported a Unicode / UTF-8 -converter from the Mozilla codebase to PHP, which is re-used in phputf8 diff --git a/src/phputf8/mbstring/core.php b/src/phputf8/mbstring/core.php deleted file mode 100644 index 6cb5501d..00000000 --- a/src/phputf8/mbstring/core.php +++ /dev/null @@ -1,132 +0,0 @@ - -* @link http://www.php.net/manual/en/function.strlen.php -* @link http://www.php.net/manual/en/function.utf8-decode.php -* @param string UTF-8 string -* @return int number of UTF-8 characters in string -* @package utf8 -*/ -function utf8_strlen($str){ - return strlen(utf8_decode($str)); -} - - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to strpos -* Find position of first occurrence of a string -* Note: This will get alot slower if offset is used -* Note: requires utf8_strlen amd utf8_substr to be loaded -* @param string haystack -* @param string needle (you should validate this with utf8_is_valid) -* @param integer offset in characters (from left) -* @return mixed integer position or FALSE on failure -* @see http://www.php.net/strpos -* @see utf8_strlen -* @see utf8_substr -* @package utf8 -*/ -function utf8_strpos($str, $needle, $offset = NULL) { - - if ( is_null($offset) ) { - - $ar = explode($needle, $str, 2); - if ( count($ar) > 1 ) { - return utf8_strlen($ar[0]); - } - return FALSE; - - } else { - - if ( !is_int($offset) ) { - trigger_error('utf8_strpos: Offset must be an integer',E_USER_ERROR); - return FALSE; - } - - $str = utf8_substr($str, $offset); - - if ( FALSE !== ( $pos = utf8_strpos($str, $needle) ) ) { - return $pos + $offset; - } - - return FALSE; - } - -} - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to strrpos -* Find position of last occurrence of a char in a string -* Note: This will get alot slower if offset is used -* Note: requires utf8_substr and utf8_strlen to be loaded -* @param string haystack -* @param string needle (you should validate this with utf8_is_valid) -* @param integer (optional) offset (from left) -* @return mixed integer position or FALSE on failure -* @see http://www.php.net/strrpos -* @see utf8_substr -* @see utf8_strlen -* @package utf8 -*/ -function utf8_strrpos($str, $needle, $offset = NULL) { - - if ( is_null($offset) ) { - - $ar = explode($needle, $str); - - if ( count($ar) > 1 ) { - // Pop off the end of the string where the last match was made - array_pop($ar); - $str = join($needle,$ar); - return utf8_strlen($str); - } - return FALSE; - - } else { - - if ( !is_int($offset) ) { - trigger_error('utf8_strrpos expects parameter 3 to be long',E_USER_WARNING); - return FALSE; - } - - $str = utf8_substr($str, $offset); - - if ( FALSE !== ( $pos = utf8_strrpos($str, $needle) ) ) { - return $pos + $offset; - } - - return FALSE; - } - -} - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to substr -* Return part of a string given character offset (and optionally length) -* -* Note arguments: comparied to substr - if offset or length are -* not integers, this version will not complain but rather massages them -* into an integer. -* -* Note on returned values: substr documentation states false can be -* returned in some cases (e.g. offset > string length) -* mb_substr never returns false, it will return an empty string instead. -* This adopts the mb_substr approach -* -* Note on implementation: PCRE only supports repetitions of less than -* 65536, in order to accept up to MAXINT values for offset and length, -* we'll repeat a group of 65535 characters when needed. -* -* Note on implementation: calculating the number of characters in the -* string is a relatively expensive operation, so we only carry it out when -* necessary. It isn't necessary for +ve offsets and no specified length -* -* @author Chris Smith -* @param string -* @param integer number of UTF-8 characters offset (from left) -* @param integer (optional) length in UTF-8 characters from offset -* @return mixed string or FALSE if failure -* @package utf8 -*/ -function utf8_substr($str, $offset, $length = NULL) { - - // generates E_NOTICE - // for PHP4 objects, but not PHP5 objects - $str = (string)$str; - $offset = (int)$offset; - if (!is_null($length)) $length = (int)$length; - - // handle trivial cases - if ($length === 0) return ''; - if ($offset < 0 && $length < 0 && $length < $offset) - return ''; - - // normalise negative offsets (we could use a tail - // anchored pattern, but they are horribly slow!) - if ($offset < 0) { - - // see notes - $strlen = strlen(utf8_decode($str)); - $offset = $strlen + $offset; - if ($offset < 0) $offset = 0; - - } - - $Op = ''; - $Lp = ''; - - // establish a pattern for offset, a - // non-captured group equal in length to offset - if ($offset > 0) { - - $Ox = (int)($offset/65535); - $Oy = $offset%65535; - - if ($Ox) { - $Op = '(?:.{65535}){'.$Ox.'}'; - } - - $Op = '^(?:'.$Op.'.{'.$Oy.'})'; - - } else { - - // offset == 0; just anchor the pattern - $Op = '^'; - - } - - // establish a pattern for length - if (is_null($length)) { - - // the rest of the string - $Lp = '(.*)$'; - - } else { - - if (!isset($strlen)) { - // see notes - $strlen = strlen(utf8_decode($str)); - } - - // another trivial case - if ($offset > $strlen) return ''; - - if ($length > 0) { - - // reduce any length that would - // go passed the end of the string - $length = min($strlen-$offset, $length); - - $Lx = (int)( $length / 65535 ); - $Ly = $length % 65535; - - // negative length requires a captured group - // of length characters - if ($Lx) $Lp = '(?:.{65535}){'.$Lx.'}'; - $Lp = '('.$Lp.'.{'.$Ly.'})'; - - } else if ($length < 0) { - - if ( $length < ($offset - $strlen) ) { - return ''; - } - - $Lx = (int)((-$length)/65535); - $Ly = (-$length)%65535; - - // negative length requires ... capture everything - // except a group of -length characters - // anchored at the tail-end of the string - if ($Lx) $Lp = '(?:.{65535}){'.$Lx.'}'; - $Lp = '(.*)(?:'.$Lp.'.{'.$Ly.'})$'; - - } - - } - - if (!preg_match( '#'.$Op.$Lp.'#us',$str, $match )) { - return ''; - } - - return $match[1]; - -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware alternative to strtolower -* Make a string lowercase -* Note: The concept of a characters "case" only exists is some alphabets -* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does -* not exist in the Chinese alphabet, for example. See Unicode Standard -* Annex #21: Case Mappings -* Note: requires utf8_to_unicode and utf8_from_unicode -* @author Andreas Gohr -* @param string -* @return mixed either string in lowercase or FALSE is UTF-8 invalid -* @see http://www.php.net/strtolower -* @see utf8_to_unicode -* @see utf8_from_unicode -* @see http://www.unicode.org/reports/tr21/tr21-5.html -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @package utf8 -*/ -function utf8_strtolower($string){ - - static $UTF8_UPPER_TO_LOWER = NULL; - - if ( is_null($UTF8_UPPER_TO_LOWER) ) { - $UTF8_UPPER_TO_LOWER = array( - 0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062, - 0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101, - 0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3, - 0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C, - 0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F, - 0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F, - 0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3, - 0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B, - 0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9, - 0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D, - 0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4, - 0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165, - 0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157, - 0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119, - 0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129, - 0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448, - 0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075, - 0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A, - 0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC, - 0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0, - 0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D, - 0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0, - 0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5, - 0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA, - 0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065, - 0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F, - 0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068, - 0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6, - 0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457, - 0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5, - 0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6, - 0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071, - 0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458, - 0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE, - 0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127, - 0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C, - 0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F, - 0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB, - 0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441, - 0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B, - 0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103, - 0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9, - 0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123, - ); - } - - $uni = utf8_to_unicode($string); - - if ( !$uni ) { - return FALSE; - } - - $cnt = count($uni); - for ($i=0; $i < $cnt; $i++){ - if ( isset($UTF8_UPPER_TO_LOWER[$uni[$i]]) ) { - $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; - } - } - - return utf8_from_unicode($uni); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware alternative to strtoupper -* Make a string uppercase -* Note: The concept of a characters "case" only exists is some alphabets -* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does -* not exist in the Chinese alphabet, for example. See Unicode Standard -* Annex #21: Case Mappings -* Note: requires utf8_to_unicode and utf8_from_unicode -* @author Andreas Gohr -* @param string -* @return mixed either string in lowercase or FALSE is UTF-8 invalid -* @see http://www.php.net/strtoupper -* @see utf8_to_unicode -* @see utf8_from_unicode -* @see http://www.unicode.org/reports/tr21/tr21-5.html -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @package utf8 -*/ -function utf8_strtoupper($string){ - - static $UTF8_LOWER_TO_UPPER = NULL; - - if ( is_null($UTF8_LOWER_TO_UPPER) ) { - $UTF8_LOWER_TO_UPPER = array( - 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, - 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, - 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, - 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, - 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, - 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, - 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, - 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, - 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, - 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, - 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, - 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, - 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, - 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, - 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, - 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, - 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, - 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, - 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, - 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, - 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, - 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, - 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, - 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, - 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, - 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, - 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, - 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, - 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, - 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, - 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, - 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, - 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, - 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, - 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, - 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, - 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, - 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, - 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, - 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, - 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, - 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, - 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, - ); - } - - $uni = utf8_to_unicode($string); - - if ( !$uni ) { - return FALSE; - } - - $cnt = count($uni); - for ($i=0; $i < $cnt; $i++){ - if( isset($UTF8_LOWER_TO_UPPER[$uni[$i]]) ) { - $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; - } - } - - return utf8_from_unicode($uni); -} diff --git a/src/phputf8/ord.php b/src/phputf8/ord.php deleted file mode 100644 index c3f000ca..00000000 --- a/src/phputf8/ord.php +++ /dev/null @@ -1,96 +0,0 @@ -= 0 && $ord0 <= 127 ) { - return $ord0; - } - - if ( !isset($chr[1]) ) { - trigger_error('Short sequence - at least 2 bytes expected, only 1 seen'); - return FALSE; - } - - $ord1 = ord($chr[1]); - if ( $ord0 >= 192 && $ord0 <= 223 ) { - return ( $ord0 - 192 ) * 64 - + ( $ord1 - 128 ); - } - - if ( !isset($chr[2]) ) { - trigger_error('Short sequence - at least 3 bytes expected, only 2 seen'); - return FALSE; - } - $ord2 = ord($chr[2]); - if ( $ord0 >= 224 && $ord0 <= 239 ) { - return ($ord0-224)*4096 - + ($ord1-128)*64 - + ($ord2-128); - } - - if ( !isset($chr[3]) ) { - trigger_error('Short sequence - at least 4 bytes expected, only 3 seen'); - return FALSE; - } - $ord3 = ord($chr[3]); - if ($ord0>=240 && $ord0<=247) { - return ($ord0-240)*262144 - + ($ord1-128)*4096 - + ($ord2-128)*64 - + ($ord3-128); - - } - - if ( !isset($chr[4]) ) { - trigger_error('Short sequence - at least 5 bytes expected, only 4 seen'); - return FALSE; - } - $ord4 = ord($chr[4]); - if ($ord0>=248 && $ord0<=251) { - return ($ord0-248)*16777216 - + ($ord1-128)*262144 - + ($ord2-128)*4096 - + ($ord3-128)*64 - + ($ord4-128); - } - - if ( !isset($chr[5]) ) { - trigger_error('Short sequence - at least 6 bytes expected, only 5 seen'); - return FALSE; - } - if ($ord0>=252 && $ord0<=253) { - return ($ord0-252) * 1073741824 - + ($ord1-128)*16777216 - + ($ord2-128)*262144 - + ($ord3-128)*4096 - + ($ord4-128)*64 - + (ord($chr[5])-128); - } - - if ( $ord0 >= 254 && $ord0 <= 255 ) { - trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0); - return FALSE; - } - -} - diff --git a/src/phputf8/str_ireplace.php b/src/phputf8/str_ireplace.php deleted file mode 100644 index 84abff3c..00000000 --- a/src/phputf8/str_ireplace.php +++ /dev/null @@ -1,77 +0,0 @@ - -* @param string $input -* @param int $length -* @param string $padStr -* @param int $type ( same constants as str_pad ) -* @return string -* @see http://www.php.net/str_pad -* @see utf8_substr -* @package utf8 -*/ -function utf8_str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) { - - $inputLen = utf8_strlen($input); - if ($length <= $inputLen) { - return $input; - } - - $padStrLen = utf8_strlen($padStr); - $padLen = $length - $inputLen; - - if ($type == STR_PAD_RIGHT) { - $repeatTimes = ceil($padLen / $padStrLen); - return utf8_substr($input . str_repeat($padStr, $repeatTimes), 0, $length); - } - - if ($type == STR_PAD_LEFT) { - $repeatTimes = ceil($padLen / $padStrLen); - return utf8_substr(str_repeat($padStr, $repeatTimes), 0, floor($padLen)) . $input; - } - - if ($type == STR_PAD_BOTH) { - - $padLen/= 2; - $padAmountLeft = floor($padLen); - $padAmountRight = ceil($padLen); - $repeatTimesLeft = ceil($padAmountLeft / $padStrLen); - $repeatTimesRight = ceil($padAmountRight / $padStrLen); - - $paddingLeft = utf8_substr(str_repeat($padStr, $repeatTimesLeft), 0, $padAmountLeft); - $paddingRight = utf8_substr(str_repeat($padStr, $repeatTimesRight), 0, $padAmountLeft); - return $paddingLeft . $input . $paddingRight; - } - - trigger_error('utf8_str_pad: Unknown padding type (' . $type . ')',E_USER_ERROR); -} diff --git a/src/phputf8/str_split.php b/src/phputf8/str_split.php deleted file mode 100644 index 13b93d51..00000000 --- a/src/phputf8/str_split.php +++ /dev/null @@ -1,32 +0,0 @@ - -* @see http://www.php.net/ltrim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_ltrim( $str, $charlist = FALSE ) { - if($charlist === FALSE) return ltrim($str); - - //quote charlist for use in a characterclass - $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); - - return preg_replace('/^['.$charlist.']+/u','',$str); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware replacement for rtrim() -* Note: you only need to use this if you are supplying the charlist -* optional arg and it contains UTF-8 characters. Otherwise rtrim will -* work normally on a UTF-8 string -* @author Andreas Gohr -* @see http://www.php.net/rtrim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_rtrim( $str, $charlist = FALSE ) { - if($charlist === FALSE) return rtrim($str); - - //quote charlist for use in a characterclass - $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); - - return preg_replace('/['.$charlist.']+$/u','',$str); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware replacement for trim() -* Note: you only need to use this if you are supplying the charlist -* optional arg and it contains UTF-8 characters. Otherwise trim will -* work normally on a UTF-8 string -* @author Andreas Gohr -* @see http://www.php.net/trim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_trim( $str, $charlist = FALSE ) { - if($charlist === FALSE) return trim($str); - return utf8_ltrim(utf8_rtrim($str, $charlist), $charlist); -} diff --git a/src/phputf8/ucfirst.php b/src/phputf8/ucfirst.php deleted file mode 100644 index 7f7ae9ec..00000000 --- a/src/phputf8/ucfirst.php +++ /dev/null @@ -1,31 +0,0 @@ - -* if ( utf8_is_ascii($someString) ) { -* // It's just ASCII - use the native PHP version -* $someString = strtolower($someString); -* } else { -* $someString = utf8_strtolower($someString); -* } -* -* -* @param string -* @return boolean TRUE if it's all ASCII -* @package utf8 -* @see utf8_is_ascii_ctrl -*/ -function utf8_is_ascii($str) { - // Search for any bytes which are outside the ASCII range... - return (preg_match('/(?:[^\x00-\x7F])/',$str) !== 1); -} - -//-------------------------------------------------------------------- -/** -* Tests whether a string contains only 7bit ASCII bytes with device -* control codes omitted. The device control codes can be found on the -* second table here: http://www.w3schools.com/tags/ref_ascii.asp -* -* @param string -* @return boolean TRUE if it's all ASCII without device control codes -* @package utf8 -* @see utf8_is_ascii -*/ -function utf8_is_ascii_ctrl($str) { - if ( strlen($str) > 0 ) { - // Search for any bytes which are outside the ASCII range, - // or are device control codes - return (preg_match('/[^\x09\x0A\x0D\x20-\x7E]/',$str) !== 1); - } - return FALSE; -} - -//-------------------------------------------------------------------- -/** -* Strip out all non-7bit ASCII bytes -* If you need to transmit a string to system which you know can only -* support 7bit ASCII, you could use this function. -* @param string -* @return string with non ASCII bytes removed -* @package utf8 -* @see utf8_strip_non_ascii_ctrl -*/ -function utf8_strip_non_ascii($str) { - ob_start(); - while ( preg_match( - '/^([\x00-\x7F]+)|([^\x00-\x7F]+)/S', - $str, $matches) ) { - if ( !isset($matches[2]) ) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Strip out device control codes in the ASCII range -* which are not permitted in XML. Note that this leaves -* multi-byte characters untouched - it only removes device -* control codes -* @see http://hsivonen.iki.fi/producing-xml/#controlchar -* @param string -* @return string control codes removed -*/ -function utf8_strip_ascii_ctrl($str) { - ob_start(); - while ( preg_match( - '/^([^\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)|([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)/S', - $str, $matches) ) { - if ( !isset($matches[2]) ) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Strip out all non 7bit ASCII bytes and ASCII device control codes. -* For a list of ASCII device control codes see the 2nd table here: -* http://www.w3schools.com/tags/ref_ascii.asp -* -* @param string -* @return boolean TRUE if it's all ASCII -* @package utf8 -*/ -function utf8_strip_non_ascii_ctrl($str) { - ob_start(); - while ( preg_match( - '/^([\x09\x0A\x0D\x20-\x7E]+)|([^\x09\x0A\x0D\x20-\x7E]+)/S', - $str, $matches) ) { - if ( !isset($matches[2]) ) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//--------------------------------------------------------------- -/** -* Replace accented UTF-8 characters by unaccented ASCII-7 "equivalents". -* The purpose of this function is to replace characters commonly found in Latin -* alphabets with something more or less equivalent from the ASCII range. This can -* be useful for converting a UTF-8 to something ready for a filename, for example. -* Following the use of this function, you would probably also pass the string -* through utf8_strip_non_ascii to clean out any other non-ASCII chars -* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) -* letters. Default is to deaccent both cases ($case = 0) -* -* For a more complete implementation of transliteration, see the utf8_to_ascii package -* available from the phputf8 project downloads: -* http://prdownloads.sourceforge.net/phputf8 -* -* @param string UTF-8 string -* @param int (optional) -1 lowercase only, +1 uppercase only, 1 both cases -* @param string UTF-8 with accented characters replaced by ASCII chars -* @return string accented chars replaced with ascii equivalents -* @author Andreas Gohr -* @package utf8 -*/ -function utf8_accents_to_ascii( $str, $case=0 ){ - - static $UTF8_LOWER_ACCENTS = NULL; - static $UTF8_UPPER_ACCENTS = NULL; - - if($case <= 0){ - - if ( is_null($UTF8_LOWER_ACCENTS) ) { - $UTF8_LOWER_ACCENTS = array( - 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', - 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', - 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', - 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', - 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', - 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', - 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', - 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', - 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', - 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', - 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', - 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', - 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', - 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', - 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', - ); - } - - $str = str_replace( - array_keys($UTF8_LOWER_ACCENTS), - array_values($UTF8_LOWER_ACCENTS), - $str - ); - } - - if($case >= 0){ - if ( is_null($UTF8_UPPER_ACCENTS) ) { - $UTF8_UPPER_ACCENTS = array( - 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', - 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', - 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', - 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', - 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', - 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', - 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', - 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', - 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', - 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', - 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', - 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', - 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', - 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', - 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', - ); - } - $str = str_replace( - array_keys($UTF8_UPPER_ACCENTS), - array_values($UTF8_UPPER_ACCENTS), - $str - ); - } - - return $str; - -} diff --git a/src/phputf8/utils/bad.php b/src/phputf8/utils/bad.php deleted file mode 100644 index 3e54fc22..00000000 --- a/src/phputf8/utils/bad.php +++ /dev/null @@ -1,412 +0,0 @@ - 0 ) { - return $badList; - } - return FALSE; -} - -//-------------------------------------------------------------------- -/** -* Strips out any bad bytes from a UTF-8 string and returns the rest -* PCRE Pattern to locate bad bytes in a UTF-8 string -* Comes from W3 FAQ: Multilingual Forms -* Note: modified to include full ASCII range including control chars -* @see http://www.w3.org/International/questions/qa-forms-utf-8 -* @param string -* @return string -* @package utf8 -*/ -function utf8_bad_strip($str) { - $UTF8_BAD = - '([\x00-\x7F]'. # ASCII (including control chars) - '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte - '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs - '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte - '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates - '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 - '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 - '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 - '|(.{1}))'; # invalid byte - ob_start(); - while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { - if ( !isset($matches[2])) { - echo $matches[0]; - } - $str = substr($str,strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Replace bad bytes with an alternative character - ASCII character -* recommended is replacement char -* PCRE Pattern to locate bad bytes in a UTF-8 string -* Comes from W3 FAQ: Multilingual Forms -* Note: modified to include full ASCII range including control chars -* @see http://www.w3.org/International/questions/qa-forms-utf-8 -* @param string to search -* @param string to replace bad bytes with (defaults to '?') - use ASCII -* @return string -* @package utf8 -*/ -function utf8_bad_replace($str, $replace = '?') { - $UTF8_BAD = - '([\x00-\x7F]'. # ASCII (including control chars) - '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte - '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs - '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte - '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates - '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 - '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 - '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 - '|(.{1}))'; # invalid byte - ob_start(); - while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { - if ( !isset($matches[2])) { - echo $matches[0]; - } else { - echo $replace; - } - $str = substr($str,strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Return code from utf8_bad_identify() when a five octet sequence is detected. -* Note: 5 octets sequences are valid UTF-8 but are not supported by Unicode so -* do not represent a useful character -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_5OCTET',1); - -/** -* Return code from utf8_bad_identify() when a six octet sequence is detected. -* Note: 6 octets sequences are valid UTF-8 but are not supported by Unicode so -* do not represent a useful character -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_6OCTET',2); - -/** -* Return code from utf8_bad_identify(). -* Invalid octet for use as start of multi-byte UTF-8 sequence -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SEQID',3); - -/** -* Return code from utf8_bad_identify(). -* From Unicode 3.1, non-shortest form is illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_NONSHORT',4); - -/** -* Return code from utf8_bad_identify(). -* From Unicode 3.2, surrogate characters are illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SURROGATE',5); - -/** -* Return code from utf8_bad_identify(). -* Codepoints outside the Unicode range are illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_UNIOUTRANGE',6); - -/** -* Return code from utf8_bad_identify(). -* Incomplete multi-octet sequence -* Note: this is kind of a "catch-all" -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SEQINCOMPLETE',7); - -//-------------------------------------------------------------------- -/** -* Reports on the type of bad byte found in a UTF-8 string. Returns a -* status code on the first bad byte found -* -* Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result this function has been -* modified to use square brace syntax -* See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a -* for additional references -* -* @author -* @param string UTF-8 encoded string -* @return mixed integer constant describing problem or FALSE if valid UTF-8 -* @see utf8_bad_explain -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_bad_identify($str, &$i) { - - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $len = strlen($str); - - for($i = 0; $i < $len; $i++) { - - $in = ord($str[$i]); - - if ( $mState == 0) { - - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $mBytes = 1; - - } else if (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - - } else if (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - - } else if (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - - } else if (0xF8 == (0xFC & ($in))) { - - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - */ - - return UTF8_BAD_5OCTET; - - } else if (0xFC == (0xFE & ($in))) { - - // First octet of 6 octet sequence, see comments for 5 octet sequence. - return UTF8_BAD_6OCTET; - - } else { - // Current octet is neither in the US-ASCII range nor a legal first - // octet of a multi-octet sequence. - return UTF8_BAD_SEQID; - - } - - } else { - - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - - // From Unicode 3.1, non-shortest form is illegal - if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) ) { - return UTF8_BAD_NONSHORT; - - // From Unicode 3.2, surrogate characters are illegal - } else if (($mUcs4 & 0xFFFFF800) == 0xD800) { - return UTF8_BAD_SURROGATE; - - // Codepoints outside the Unicode range are illegal - } else if ($mUcs4 > 0x10FFFF) { - return UTF8_BAD_UNIOUTRANGE; - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - - } else { - // ((0xC0 & (*in) != 0x80) && (mState != 0)) - // Incomplete multi-octet sequence. - $i--; - return UTF8_BAD_SEQINCOMPLETE; - } - } - } - - if ( $mState != 0 ) { - // Incomplete multi-octet sequence. - $i--; - return UTF8_BAD_SEQINCOMPLETE; - } - - // No bad octets found - $i = NULL; - return FALSE; -} - -//-------------------------------------------------------------------- -/** -* Takes a return code from utf8_bad_identify() are returns a message -* (in English) explaining what the problem is. -* @param int return code from utf8_bad_identify -* @return mixed string message or FALSE if return code unknown -* @see utf8_bad_identify -* @package utf8 -*/ -function utf8_bad_explain($code) { - - switch ($code) { - - case UTF8_BAD_5OCTET: - return 'Five octet sequences are valid UTF-8 but are not supported by Unicode'; - break; - - case UTF8_BAD_6OCTET: - return 'Six octet sequences are valid UTF-8 but are not supported by Unicode'; - break; - - case UTF8_BAD_SEQID: - return 'Invalid octet for use as start of multi-byte UTF-8 sequence'; - break; - - case UTF8_BAD_NONSHORT: - return 'From Unicode 3.1, non-shortest form is illegal'; - break; - - case UTF8_BAD_SURROGATE: - return 'From Unicode 3.2, surrogate characters are illegal'; - break; - - case UTF8_BAD_UNIOUTRANGE: - return 'Codepoints outside the Unicode range are illegal'; - break; - - case UTF8_BAD_SEQINCOMPLETE: - return 'Incomplete multi-octet sequence'; - break; - - } - - trigger_error('Unknown error code: '.$code,E_USER_WARNING); - return FALSE; - -} diff --git a/src/phputf8/utils/patterns.php b/src/phputf8/utils/patterns.php deleted file mode 100644 index 0ed83463..00000000 --- a/src/phputf8/utils/patterns.php +++ /dev/null @@ -1,64 +0,0 @@ - -* @param string string to locate index in -* @param int (n times) -* @return mixed - int if only one input int, array if more -* @return boolean TRUE if it's all ASCII -* @package utf8 -*/ -function utf8_byte_position() { - - $args = func_get_args(); - $str =& array_shift($args); - if (!is_string($str)) return false; - - $result = array(); - - // trivial byte index, character offset pair - $prev = array(0,0); - - // use a short piece of str to estimate bytes per character - // $i (& $j) -> byte indexes into $str - $i = utf8_locate_next_chr($str, 300); - - // $c -> character offset into $str - $c = strlen(utf8_decode(substr($str,0,$i))); - - // deal with arguments from lowest to highest - sort($args); - - foreach ($args as $offset) { - // sanity checks FIXME - - // 0 is an easy check - if ($offset == 0) { $result[] = 0; continue; } - - // ensure no endless looping - $safety_valve = 50; - - do { - - if ( ($c - $prev[1]) == 0 ) { - // Hack: gone past end of string - $error = 0; - $i = strlen($str); - break; - } - - $j = $i + (int)(($offset-$c) * ($i - $prev[0]) / ($c - $prev[1])); - - // correct to utf8 character boundary - $j = utf8_locate_next_chr($str, $j); - - // save the index, offset for use next iteration - $prev = array($i,$c); - - if ($j > $i) { - // determine new character offset - $c += strlen(utf8_decode(substr($str,$i,$j-$i))); - } else { - // ditto - $c -= strlen(utf8_decode(substr($str,$j,$i-$j))); - } - - $error = abs($c-$offset); - - // ready for next time around - $i = $j; - - // from 7 it is faster to iterate over the string - } while ( ($error > 7) && --$safety_valve) ; - - if ($error && $error <= 7) { - - if ($c < $offset) { - // move up - while ($error--) { $i = utf8_locate_next_chr($str,++$i); } - } else { - // move down - while ($error--) { $i = utf8_locate_current_chr($str,--$i); } - } - - // ready for next arg - $c = $offset; - } - $result[] = $i; - } - - if ( count($result) == 1 ) { - return $result[0]; - } - - return $result; -} - -//-------------------------------------------------------------------- -/** -* Given a string and any byte index, returns the byte index -* of the start of the current UTF-8 character, relative to supplied -* position. If the current character begins at the same place as the -* supplied byte index, that byte index will be returned. Otherwise -* this function will step backwards, looking for the index where -* current UTF-8 character begins -* @author Chris Smith -* @param string -* @param int byte index in the string -* @return int byte index of start of next UTF-8 character -* @package utf8 -*/ -function utf8_locate_current_chr( &$str, $idx ) { - - if ($idx <= 0) return 0; - - $limit = strlen($str); - if ($idx >= $limit) return $limit; - - // Binary value for any byte after the first in a multi-byte UTF-8 character - // will be like 10xxxxxx so & 0xC0 can be used to detect this kind - // of byte - assuming well formed UTF-8 - while ($idx && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx--; - - return $idx; -} - -//-------------------------------------------------------------------- -/** -* Given a string and any byte index, returns the byte index -* of the start of the next UTF-8 character, relative to supplied -* position. If the next character begins at the same place as the -* supplied byte index, that byte index will be returned. -* @author Chris Smith -* @param string -* @param int byte index in the string -* @return int byte index of start of next UTF-8 character -* @package utf8 -*/ -function utf8_locate_next_chr( &$str, $idx ) { - - if ($idx <= 0) return 0; - - $limit = strlen($str); - if ($idx >= $limit) return $limit; - - // Binary value for any byte after the first in a multi-byte UTF-8 character - // will be like 10xxxxxx so & 0xC0 can be used to detect this kind - // of byte - assuming well formed UTF-8 - while (($idx < $limit) && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx++; - - return $idx; -} - diff --git a/src/phputf8/utils/specials.php b/src/phputf8/utils/specials.php deleted file mode 100644 index a53e2745..00000000 --- a/src/phputf8/utils/specials.php +++ /dev/null @@ -1,126 +0,0 @@ - -* @param string $string The UTF8 string to strip of special chars -* @param string (optional) $repl Replace special with this string -* @return string with common non-alphanumeric characters removed -* @see utf8_specials_pattern -*/ -function utf8_strip_specials($string, $repl=''){ - return preg_replace(utf8_specials_pattern(), $repl, $string); -} - - diff --git a/src/phputf8/utils/unicode.php b/src/phputf8/utils/unicode.php deleted file mode 100644 index 452b186e..00000000 --- a/src/phputf8/utils/unicode.php +++ /dev/null @@ -1,271 +0,0 @@ - 0xFFFF. Occurrances of the BOM are ignored. Surrogates -* are not allowed. -* Returns false if the input string isn't a valid UTF-8 octet sequence -* and raises a PHP error at level E_USER_WARNING -* Note: this function has been modified slightly in this library to -* trigger errors on encountering bad bytes -* -* Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result this function has been -* modified to use square brace syntax -* See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a -* for additional references -* -* @author -* @param string UTF-8 encoded string -* @return mixed array of unicode code points or FALSE if UTF-8 invalid -* @see utf8_from_unicode -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_to_unicode($str) { - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $out = array(); - - $len = strlen($str); - - for($i = 0; $i < $len; $i++) { - - $in = ord($str[$i]); - - if ( $mState == 0) { - - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $out[] = $in; - $mBytes = 1; - - } else if (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - - } else if (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - - } else if (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - - } else if (0xF8 == (0xFC & ($in))) { - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - * Rather than trying to resynchronize, we will carry on until the end - * of the sequence and let the later error handling code catch it. - */ - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x03) << 24; - $mState = 4; - $mBytes = 5; - - } else if (0xFC == (0xFE & ($in))) { - // First octet of 6 octet sequence, see comments for 5 octet sequence. - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 1) << 30; - $mState = 5; - $mBytes = 6; - - } else { - /* Current octet is neither in the US-ASCII range nor a legal first - * octet of a multi-octet sequence. - */ - trigger_error( - 'utf8_to_unicode: Illegal sequence identifier '. - 'in UTF-8 at byte '.$i, - E_USER_WARNING - ); - return FALSE; - - } - - } else { - - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - - /* - * Check for illegal sequences and codepoints. - */ - // From Unicode 3.1, non-shortest form is illegal - if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || - (4 < $mBytes) || - // From Unicode 3.2, surrogate characters are illegal - (($mUcs4 & 0xFFFFF800) == 0xD800) || - // Codepoints outside the Unicode range are illegal - ($mUcs4 > 0x10FFFF)) { - - trigger_error( - 'utf8_to_unicode: Illegal sequence or codepoint '. - 'in UTF-8 at byte '.$i, - E_USER_WARNING - ); - - return FALSE; - - } - - if (0xFEFF != $mUcs4) { - // BOM is legal but we don't want to output it - $out[] = $mUcs4; - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - - } else { - /** - *((0xC0 & (*in) != 0x80) && (mState != 0)) - * Incomplete multi-octet sequence. - */ - trigger_error( - 'utf8_to_unicode: Incomplete multi-octet '. - ' sequence in UTF-8 at byte '.$i, - E_USER_WARNING - ); - - return FALSE; - } - } - } - return $out; -} - -//-------------------------------------------------------------------- -/** -* Takes an array of ints representing the Unicode characters and returns -* a UTF-8 string. Astral planes are supported ie. the ints in the -* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates -* are not allowed. -* Returns false if the input array contains ints that represent -* surrogates or are outside the Unicode range -* and raises a PHP error at level E_USER_WARNING -* Note: this function has been modified slightly in this library to use -* output buffering to concatenate the UTF-8 string (faster) as well as -* reference the array by it's keys -* @param array of unicode code points representing a string -* @return mixed UTF-8 string or FALSE if array contains invalid code points -* @author -* @see utf8_to_unicode -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_from_unicode($arr) { - ob_start(); - - foreach (array_keys($arr) as $k) { - - # ASCII range (including control chars) - if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { - - echo chr($arr[$k]); - - # 2 byte sequence - } else if ($arr[$k] <= 0x07ff) { - - echo chr(0xc0 | ($arr[$k] >> 6)); - echo chr(0x80 | ($arr[$k] & 0x003f)); - - # Byte order mark (skip) - } else if($arr[$k] == 0xFEFF) { - - // nop -- zap the BOM - - # Test for illegal surrogates - } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { - - // found a surrogate - trigger_error( - 'utf8_from_unicode: Illegal surrogate '. - 'at index: '.$k.', value: '.$arr[$k], - E_USER_WARNING - ); - - return FALSE; - - # 3 byte sequence - } else if ($arr[$k] <= 0xffff) { - - echo chr(0xe0 | ($arr[$k] >> 12)); - echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); - echo chr(0x80 | ($arr[$k] & 0x003f)); - - # 4 byte sequence - } else if ($arr[$k] <= 0x10ffff) { - - echo chr(0xf0 | ($arr[$k] >> 18)); - echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); - echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); - echo chr(0x80 | ($arr[$k] & 0x3f)); - - } else { - - trigger_error( - 'utf8_from_unicode: Codepoint out of Unicode range '. - 'at index: '.$k.', value: '.$arr[$k], - E_USER_WARNING - ); - - // out of range - return FALSE; - } - } - - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} diff --git a/src/phputf8/utils/validation.php b/src/phputf8/utils/validation.php deleted file mode 100644 index d050a165..00000000 --- a/src/phputf8/utils/validation.php +++ /dev/null @@ -1,187 +0,0 @@ - -* @param string UTF-8 encoded string -* @return boolean true if valid -* @see http://hsivonen.iki.fi/php-utf8/ -* @see utf8_compliant -* @package utf8 -*/ -function utf8_is_valid($str) { - - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $len = strlen($str); - - for($i = 0; $i < $len; $i++) { - - /* - * Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result the line below has - * been modified to use square brace syntax - * See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a - * for additional references - */ - $in = ord($str[$i]); - - if ( $mState == 0) { - - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $mBytes = 1; - - } else if (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - - } else if (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - - } else if (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - - } else if (0xF8 == (0xFC & ($in))) { - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - * Rather than trying to resynchronize, we will carry on until the end - * of the sequence and let the later error handling code catch it. - */ - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x03) << 24; - $mState = 4; - $mBytes = 5; - - } else if (0xFC == (0xFE & ($in))) { - // First octet of 6 octet sequence, see comments for 5 octet sequence. - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 1) << 30; - $mState = 5; - $mBytes = 6; - - } else { - /* Current octet is neither in the US-ASCII range nor a legal first - * octet of a multi-octet sequence. - */ - return FALSE; - - } - - } else { - - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - - /* - * Check for illegal sequences and codepoints. - */ - // From Unicode 3.1, non-shortest form is illegal - if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || - (4 < $mBytes) || - // From Unicode 3.2, surrogate characters are illegal - (($mUcs4 & 0xFFFFF800) == 0xD800) || - // Codepoints outside the Unicode range are illegal - ($mUcs4 > 0x10FFFF)) { - - return FALSE; - - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - - } else { - /** - *((0xC0 & (*in) != 0x80) && (mState != 0)) - * Incomplete multi-octet sequence. - */ - - return FALSE; - } - } - } - return TRUE; -} - -//-------------------------------------------------------------------- -/** -* Tests whether a string complies as UTF-8. This will be much -* faster than utf8_is_valid but will pass five and six octet -* UTF-8 sequences, which are not supported by Unicode and -* so cannot be displayed correctly in a browser. In other words -* it is not as strict as utf8_is_valid but it's faster. If you use -* is to validate user input, you place yourself at the risk that -* attackers will be able to inject 5 and 6 byte sequences (which -* may or may not be a significant risk, depending on what you are -* are doing) -* @see utf8_is_valid -* @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 -* @param string UTF-8 string to check -* @return boolean TRUE if string is valid UTF-8 -* @package utf8 -*/ -function utf8_compliant($str) { - if ( strlen($str) == 0 ) { - return TRUE; - } - // If even just the first character can be matched, when the /u - // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow - // invalid, nothing at all will match, even if the string contains - // some valid sequences - return (preg_match('/^.{1}/us',$str,$ar) == 1); -} - From 5ca434423df61f908c1b3fd17be2ba43a08a940f Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Wed, 22 Sep 2021 15:01:23 +0200 Subject: [PATCH 02/23] Docs - Add inline changelog --- src/StringHelper.php | 64 +++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 924baee9..c2975475 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -4,6 +4,11 @@ * * @copyright Copyright (C) 2005 - 2021 Open Source Matters, Inc. All rights reserved. * @license GNU General Public License version 2 or later; see LICENSE + * + * @noinspection SpellCheckingInspection + * @noinspection PhpMissingReturnTypeInspection + * @noinspection ReturnTypeCanBeDeclaredInspection + * @noinspection PhpMissingParamTypeInspection */ namespace Joomla\String; @@ -13,7 +18,8 @@ @ini_set('default_charset', 'UTF-8'); /** - * String handling class for UTF-8 data wrapping the phputf8 library. All functions assume the validity of UTF-8 strings. + * String handling class for UTF-8 data wrapping the Portable UTF-8 library. + * All functions assume the validity of UTF-8 strings. * * @since 1.3.0 */ @@ -96,6 +102,7 @@ public static function increment($string, $style = 'default', $n = null) * @return boolean True if the string is all ASCII * * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_ascii() instead. * @deprecated 3.0 Please use UTF8::is_ascii() instead. */ public static function is_ascii($str) @@ -114,6 +121,7 @@ public static function is_ascii($str) * * @link https://www.php.net/ord * @since 1.4.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::ord() instead. * @deprecated 3.0 Please use UTF8::ord() instead. */ public static function ord($chr) @@ -138,16 +146,12 @@ public static function ord($chr) * * @link https://www.php.net/strpos * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strpos() instead. * @deprecated 3.0 Please use UTF8::strpos() instead. */ public static function strpos($haystack, $needle, $offset = null) { - if ($offset === null) - { - return UTF8::strpos($haystack, $needle); - } - - return UTF8::strpos($haystack, $needle, $offset); + return UTF8::strpos($haystack, $needle, $offset ?? 0); } /** @@ -168,16 +172,12 @@ public static function strpos($haystack, $needle, $offset = null) * * @link https://www.php.net/strrpos * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strrpos() instead. * @deprecated 3.0 Please use UTF8::strrpos() instead. */ public static function strrpos($haystack, $needle, $offset = null) { - if ($offset === null) - { - $offset = 0; - } - - return UTF8::strrpos($haystack, $needle, $offset); + return UTF8::strrpos($haystack, $needle, $offset ?? 0); } /** @@ -193,15 +193,11 @@ public static function strrpos($haystack, $needle, $offset = null) * * @link https://www.php.net/substr * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::substr() instead. * @deprecated 3.0 Please use UTF8::substr() instead. */ public static function substr($str, $offset, $length = null) { - if ($length === null) - { - return UTF8::substr($str, $offset); - } - return UTF8::substr($str, $offset, $length); } @@ -215,10 +211,11 @@ public static function substr($str, $offset, $length = null) * * @param string $str String being processed * - * @return string|boolean Either string in lowercase or FALSE is UTF-8 invalid + * @return string Either string in lowercase or FALSE is UTF-8 invalid * * @link https://www.php.net/strtolower * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strtolower() instead. * @deprecated 3.0 Please use UTF8::strtolower() instead. */ public static function strtolower($str) @@ -236,10 +233,11 @@ public static function strtolower($str) * * @param string $str String being processed * - * @return string|boolean Either string in uppercase or FALSE is UTF-8 invalid + * @return string Either string in uppercase or FALSE is UTF-8 invalid * * @link https://www.php.net/strtoupper * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strtoupper() instead. * @deprecated 3.0 Please use UTF8::strtoupper() instead. */ public static function strtoupper($str) @@ -258,6 +256,7 @@ public static function strtoupper($str) * * @link https://www.php.net/strlen * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strlen() instead. * @deprecated 3.0 Please use UTF8::strlen() instead. */ public static function strlen($str) @@ -282,6 +281,7 @@ public static function strlen($str) * * @link https://www.php.net/str_ireplace * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::str_ireplace() instead. * @deprecated 3.0 Please use UTF8::str_ireplace() instead. */ public static function str_ireplace($search, $replace, $subject, &$count = null) @@ -305,6 +305,7 @@ public static function str_ireplace($search, $replace, $subject, &$count = null) * * @link https://www.php.net/str_pad * @since 1.4.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::str_pad() instead. * @deprecated 3.0 Please use UTF8::str_pad() instead. */ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) @@ -320,10 +321,11 @@ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_R * @param string $str UTF-8 encoded string to process * @param integer $splitLen Number to characters to split string by * - * @return array|string|boolean + * @return array * * @link https://www.php.net/str_split * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::str_split() instead. * @deprecated 3.0 Please use UTF8::str_split() instead. */ public static function str_split($str, $splitLen = 1) @@ -383,7 +385,6 @@ public static function strcasecmp($str1, $str2, $locale = false) * @link https://www.php.net/strcoll * @link https://www.php.net/setlocale * @since 1.3.0 - * @since __DEPLOY_VERSION__ 'locale' parameter is ignored */ public static function strcmp($str1, $str2, $locale = false) { @@ -420,6 +421,7 @@ public static function strcmp($str1, $str2, $locale = false) * * @link https://www.php.net/strcspn * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strcspn() instead. * @deprecated 3.0 Please use UTF8::strcspn() instead. */ public static function strcspn($str, $mask, $start = null, $length = null) @@ -452,6 +454,7 @@ public static function strcspn($str, $mask, $start = null, $length = null) * * @link https://www.php.net/stristr * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::stristr() instead. * @deprecated 3.0 Please use UTF8::stristr() instead. */ public static function stristr($str, $search) @@ -470,6 +473,7 @@ public static function stristr($str, $search) * * @link https://www.php.net/strrev * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strrev() instead. * @deprecated 3.0 Please use UTF8::strrev() instead. */ public static function strrev($str) @@ -491,6 +495,7 @@ public static function strrev($str) * * @link https://www.php.net/strspn * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strspn() instead. * @deprecated 3.0 Please use UTF8::strspn() instead. */ public static function strspn($str, $mask, $start = null, $length = null) @@ -522,6 +527,7 @@ public static function strspn($str, $mask, $start = null, $length = null) * * @link https://www.php.net/substr_replace * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::substr_replace() instead. * @deprecated 3.0 Please use UTF8::substr_replace() instead. */ public static function substr_replace($str, $repl, $start, $length = null) @@ -549,6 +555,7 @@ public static function substr_replace($str, $repl, $start, $length = null) * * @link https://www.php.net/ltrim * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::ltrim() instead. * @deprecated 3.0 Please use UTF8::ltrim() instead. */ public static function ltrim($str, $charlist = false) @@ -581,6 +588,7 @@ public static function ltrim($str, $charlist = false) * * @link https://www.php.net/rtrim * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::rtrim() instead. * @deprecated 3.0 Please use UTF8::rtrim() instead. */ public static function rtrim($str, $charlist = false) @@ -603,7 +611,7 @@ public static function rtrim($str, $charlist = false) * * UTF-8 aware replacement for trim() * - * You only need to use this if you are supplying the charlist optional arg and it contains UTF-8 characters. + * You only need to use this if you are supplying the charlist optional arg, and it contains UTF-8 characters. * Otherwise, trim will work normally on a UTF-8 string * * @param string $str The string to be trimmed @@ -613,6 +621,7 @@ public static function rtrim($str, $charlist = false) * * @link https://www.php.net/trim * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::trim() instead. * @deprecated 3.0 Please use UTF8::trim() instead. */ public static function trim($str, $charlist = false) @@ -636,11 +645,11 @@ public static function trim($str, $charlist = false) * UTF-8 aware alternative to ucfirst() * * @param string $str String to be processed - * @param string|null $delimiter The words delimiter (null means do not split the string) + * @param string|null $delimiter The words' delimiter (null means do not split the string) * @param string|null $newDelimiter The new words delimiter (null means equal to $delimiter) * * @return string If $delimiter is null, return the string with first character as upper case (if applicable) - * else consider the string of words separated by the delimiter, apply the ucfirst to each words + * else consider the string of words separated by the delimiter, apply the ucfirst to each word * and return the string with the new delimiter * * @link https://www.php.net/ucfirst @@ -672,6 +681,7 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) * * @link https://www.php.net/ucwords * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::ucwords() instead. * @deprecated 3.0 Please use UTF8::ucwords() instead. */ public static function ucwords($str) @@ -712,6 +722,7 @@ public static function transcode($source, $fromEncoding, $toEncoding) * @link https://hsivonen.fi/php-utf8/ * @see compliant * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_utf8() instead. * @deprecated 3.0 Please use UTF8::is_utf8() instead. */ public static function valid($str) @@ -735,6 +746,7 @@ public static function valid($str) * @see StringHelper::valid * @link https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_utf8() instead. * @deprecated 3.0 Please use UTF8::is_utf8() instead. */ public static function compliant($str) @@ -750,6 +762,7 @@ public static function compliant($str) * @return string UTF-8 string * * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::to_utf8_string() instead. * @deprecated 3.0 Please use UTF8::to_utf8_string() instead. */ public static function unicode_to_utf8($str) @@ -765,6 +778,7 @@ public static function unicode_to_utf8($str) * @return string UTF-16 string * * @since 1.3.0 + * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::to_utf8_string() instead. * @deprecated 3.0 Please use UTF8::to_utf8_string() instead. */ public static function unicode_to_utf16($str) From 15fa71a481f251ae269a7717e71a25924eca3f4f Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Wed, 22 Sep 2021 17:32:59 +0200 Subject: [PATCH 03/23] Docs - Revise inline documentation --- src/StringHelper.php | 424 +++++++++++++++++++++---------------------- 1 file changed, 211 insertions(+), 213 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index c2975475..1db393b4 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -2,8 +2,8 @@ /** * Part of the Joomla Framework String Package * - * @copyright Copyright (C) 2005 - 2021 Open Source Matters, Inc. All rights reserved. - * @license GNU General Public License version 2 or later; see LICENSE + * @copyright Copyright (C) 2005 - 2021 Open Source Matters, Inc. All rights reserved. + * @license GNU General Public License version 2 or later; see LICENSE * * @noinspection SpellCheckingInspection * @noinspection PhpMissingReturnTypeInspection @@ -42,6 +42,11 @@ abstract class StringHelper ], ]; + /** + * @var false|string + */ + private static $currentLocale; + /** * Increment a trailing number in a string. * @@ -80,7 +85,7 @@ public static function increment($string, $style = 'default', $n = null) } /** - * Test whether a string contains only 7bit ASCII bytes. + * Check if a string is 7 bit ASCII. * * You might use this to conditionally check whether a string needs handling as UTF-8 or not, potentially offering performance * benefits by using the native PHP equivalent if it's just ASCII e.g.; @@ -97,9 +102,9 @@ public static function increment($string, $style = 'default', $n = null) * } * * - * @param string $str The string to test. + * @param string $str TThe string to check. * - * @return boolean True if the string is all ASCII + * @return boolean true if it is ASCII, false otherwise * * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_ascii() instead. @@ -111,13 +116,11 @@ public static function is_ascii($str) } /** - * Convert the first byte of a string to its ordinal number + * Calculate Unicode code point of the given UTF-8 encoded character. * - * UTF-8 aware alternative to ord() + * @param string $chr The character of which to calculate code point. * - * @param string $chr UTF-8 encoded character - * - * @return integer Unicode ordinal for the character + * @return integer Unicode code point of the given character, 0 on invalid UTF-8 byte sequence * * @link https://www.php.net/ord * @since 1.4.0 @@ -130,19 +133,14 @@ public static function ord($chr) } /** - * Find the position of the first occurrence of a substring in a string - * - * UTF-8 aware alternative to strpos() + * Find the position of the first occurrence of a substring in a string. * - * @param string $haystack The string to search in - * @param string $needle String being searched for - * @param integer|null $offset If specified, search will start this number of characters counted from the - * beginning of the string. Unlike {@see strrpos()}, the offset cannot be negative. + * @param string $haystack The string from which to get the position of the first occurrence of needle. + * @param integer|string $needle The string to find in haystack, or a code point as int. + * @param integer|null $offset [optional] The search offset. If it is not specified, 0 is used. * - * @return integer|boolean Returns the position where the needle exists relative to the beginnning of the haystack - * string (independent of search direction or offset). Also note that string positions - * start at 0, and not 1. - * Returns false if the needle was not found. + * @return integer|boolean The numeric position of the first occurrence of needle in the haystack string. + * If needle is not found it returns false. * * @link https://www.php.net/strpos * @since 1.3.0 @@ -155,20 +153,16 @@ public static function strpos($haystack, $needle, $offset = null) } /** - * Find the position of the last occurrence of a substring in a string - * - * UTF-8 aware alternative to strrpos() + * Find the position of the last occurrence of a substring in a string. * - * @param string $haystack The string to search in. - * @param string $needle String being searched for. - * @param integer $offset If specified, search will start this number of characters counted from the beginning - * of the string. If the value is negative, search will instead start from that many - * characters from the end of the string, searching backwards. + * @param string $haystack The string being checked for the last occurrence of needle. + * @param integer|string $needle The string to find in haystack or a code point as int. + * @param integer $offset [optional] Can be specified to start the search after the given number of characters in + * the string. Negative values stop the search at the given point before the end + * of the string. * - * @return integer|boolean Returns the position where the needle exists relative to the beginnning of the haystack - * string (independent of search direction or offset). Also note that string positions - * start at 0, and not 1. - * Returns false if the needle was not found. + * @return integer|boolean The numeric position of the last occurrence of needle in the haystack string. + * If needle is not found, it returns false. * * @link https://www.php.net/strrpos * @since 1.3.0 @@ -181,15 +175,14 @@ public static function strrpos($haystack, $needle, $offset = null) } /** - * Get part of a string given character offset (and optionally length). + * Get part of a string. * - * UTF-8 aware alternative to substr() + * @param string $str The string being checked. + * @param integer $offset The first position used in str. + * @param integer|null $length [optional] The maximum length of the returned string. * - * @param string $str String being processed - * @param integer $offset Number of UTF-8 characters offset (from left) - * @param integer|null $length Optional length in UTF-8 characters from offset - * - * @return string|boolean + * @return string|boolean The portion of str specified by the offset and length parameters. + * If str is shorter than offset characters, false will be returned. * * @link https://www.php.net/substr * @since 1.3.0 @@ -202,20 +195,19 @@ public static function substr($str, $offset, $length = null) } /** - * Make a string lowercase - * - * UTF-8 aware alternative to strtolower() + * Make a string lowercase. * * Note: The concept of a characters "case" only exists is some alphabets such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does * not exist in the Chinese alphabet, for example. See Unicode Standard Annex #21: Case Mappings * - * @param string $str String being processed + * @param string $str The string being lowercased. * - * @return string Either string in lowercase or FALSE is UTF-8 invalid + * @return string String with all alphabetic characters converted to lowercase. * * @link https://www.php.net/strtolower * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strtolower() instead. + * @since __DEPLOY_VERSION__ str is always cast to string. * @deprecated 3.0 Please use UTF8::strtolower() instead. */ public static function strtolower($str) @@ -224,20 +216,19 @@ public static function strtolower($str) } /** - * Make a string uppercase - * - * UTF-8 aware alternative to strtoupper() + * Make a string uppercase. * * Note: The concept of a characters "case" only exists is some alphabets such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does * not exist in the Chinese alphabet, for example. See Unicode Standard Annex #21: Case Mappings * - * @param string $str String being processed + * @param string $str The string being uppercased. * - * @return string Either string in uppercase or FALSE is UTF-8 invalid + * @return string String with all alphabetic characters converted to uppercase. * * @link https://www.php.net/strtoupper * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strtoupper() instead. + * @since __DEPLOY_VERSION__ str is always cast to string. * @deprecated 3.0 Please use UTF8::strtoupper() instead. */ public static function strtoupper($str) @@ -246,17 +237,17 @@ public static function strtoupper($str) } /** - * UTF-8 aware alternative to strlen() + * Get the string length, not the byte-length! * - * Returns the number of characters in the string (NOT THE NUMBER OF BYTES). + * @param string $str The string being checked for length. * - * @param string $str UTF-8 string. - * - * @return integer Number of UTF-8 characters in string. + * @return integer|false The number of characters in the string or false, if mbstring is not installed and invalid + * characters are encountered. * * @link https://www.php.net/strlen * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strlen() instead. + * @since __DEPLOY_VERSION__ Returns false, if mbstring is not installed and invalid characters are encountered. * @deprecated 3.0 Please use UTF8::strlen() instead. */ public static function strlen($str) @@ -265,23 +256,26 @@ public static function strlen($str) } /** - * Replace (parts of) a string in a case-insensitive manner - * - * UTF-8 aware alternative to str_ireplace() - * + * Case-insensitive and UTF-8 safe version of str_replace(). * * @param string[]|string $search String(s) to search * Every replacement with search array is * performed on the result of previous replacement. - * @param string[]|string $replace New string(s) to replace with - * @param string $subject Existing string to replace - * @param integer|null $count Optional count value to be passed by reference + * @param string[]|string $replace The replacement. + * @param string[]|string $subject If subject is an array, then the search and + * replace is performed with every entry of + * subject, and the return value is an array as + * well. + * @param integer|null $count [optional] The number of matched and replaced needles will + * be returned in count which is passed by + * reference. * - * @return string UTF-8 String + * @return string[]|string A string or an array of strings with applied replacements. * * @link https://www.php.net/str_ireplace * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::str_ireplace() instead. + * @since __DEPLOY_VERSION__ Accepts an array as subject. * @deprecated 3.0 Please use UTF8::str_ireplace() instead. */ public static function str_ireplace($search, $replace, $subject, &$count = null) @@ -292,16 +286,15 @@ public static function str_ireplace($search, $replace, $subject, &$count = null) /** * Pad a string to a certain length with another string. * - * UTF-8 aware alternative to str_pad() - * - * $padStr may contain multi-byte characters. - * * @param string $input The input string. - * @param integer $length If the value is negative, less than, or equal to the length of the input string, no padding takes place. - * @param string $padStr The string may be truncated if the number of padding characters can't be evenly divided by the string's length. - * @param integer $type The type of padding to apply + * @param integer $length The length of return string. If the value is negative, less than, or equal to the + * length of the input string, no padding takes place. + * @param string $padStr [optional] String to use for padding the input string. The string may be truncated if the number + * of padding characters can't be evenly divided by the string's length. + * @param integer $type [optional] The type of padding to apply. Can be STR_PAD_RIGHT, STR_PAD_LEFT or + * STR_PAD_BOTH. * - * @return string + * @return string The padded string. * * @link https://www.php.net/str_pad * @since 1.4.0 @@ -314,14 +307,12 @@ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_R } /** - * Split a string into an array. - * - * UTF-8 aware alternative to str_split() + * Convert a string to an array of unicode characters. * - * @param string $str UTF-8 encoded string to process - * @param integer $splitLen Number to characters to split string by + * @param string $str The string to split into an array. + * @param integer $splitLen [optional] Max character length of each array element. * - * @return array + * @return array An array containing chunks of chars from the input. * * @link https://www.php.net/str_split * @since 1.3.0 @@ -334,24 +325,27 @@ public static function str_split($str, $splitLen = 1) } /** - * Compare strings in a case-insensitive manner. + * Case-insensitive string comparison. * - * UTF-8/LOCALE aware alternative to strcasecmp() + * If no locale is provided, this method is an alias for UTF8::strcasecmp(). + * If a locale is provided, that locale is set, if possible, and used for comparison with strcoll(). * - * @param string $str1 string 1 to compare - * @param string $str2 string 2 to compare - * @param array|string|boolean $locale The locale used by strcoll or false to use classical comparison + * @param string $str1 The first string. + * @param string $str2 The second string. + * @param string[]|string $locale [optional] A locale for collation aware comparison. + * See setlocale() for valid values. * - * @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. + * @return integer < 0 if str1 is less than str2, > 0 if str1 is greater than str2, 0 if they are equal. * * @link https://www.php.net/strcasecmp * @link https://www.php.net/strcoll * @link https://www.php.net/setlocale * @since 1.3.0 + * @since __DEPLOY_VERSION__ Restores locale after comparision. */ - public static function strcasecmp($str1, $str2, $locale = false) + public static function strcasecmp($str1, $str2, $locale = null) { - if ($locale === false) + if (empty($locale)) { return UTF8::strcasecmp($str1, $str2); } @@ -361,30 +355,39 @@ public static function strcasecmp($str1, $str2, $locale = false) // If we successfully set encoding it to utf-8 or encoding is sth weird don't recode if ($encoding === 'UTF-8' || $encoding === 'nonrecodable') { - return strcoll(UTF8::strtolower($str1), UTF8::strtolower($str2)); + $result = strcoll(UTF8::strtolower($str1), UTF8::strtolower($str2)); } + else + { + $result = strcoll( + static::transcode(UTF8::strtolower($str1), 'UTF-8', $encoding), + static::transcode(UTF8::strtolower($str2), 'UTF-8', $encoding) + ); + } + + self::restoreLocale(); - return strcoll( - static::transcode(UTF8::strtolower($str1), 'UTF-8', $encoding), - static::transcode(UTF8::strtolower($str2), 'UTF-8', $encoding) - ); + return $result; } /** - * Compare strings in a case-sensitive manner. + * Case-sensitive string comparison. * - * UTF-8/LOCALE aware alternative to strcmp() + * If no locale is provided, this method is an alias for UTF8::strcmp(). + * If a locale is provided, that locale is set, if possible, and used for comparison with strcoll(). * - * @param string $str1 string 1 to compare - * @param string $str2 string 2 to compare - * @param array|string|boolean $locale The locale used by strcoll or false to use classical comparison + * @param string $str1 The first string. + * @param string $str2 The second string. + * @param string[]|string $locale [optional] A locale for collation aware comparison. + * See setlocale() for valid values. * - * @return integer < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. + * @return integer < 0 if str1 is less than str2, > 0 if str1 is greater than str2, 0 if they are equal. * * @link https://www.php.net/strcmp * @link https://www.php.net/strcoll * @link https://www.php.net/setlocale * @since 1.3.0 + * @since __DEPLOY_VERSION__ Restores locale after comparision. */ public static function strcmp($str1, $str2, $locale = false) { @@ -398,24 +401,28 @@ public static function strcmp($str1, $str2, $locale = false) // If we successfully set encoding it to utf-8 or encoding is sth weird don't recode if ($encoding === 'UTF-8' || $encoding === 'nonrecodable') { - return strcoll($str1, $str2); + $result = strcoll($str1, $str2); + } + else + { + $result = strcoll( + static::transcode($str1, 'UTF-8', $encoding), + static::transcode($str2, 'UTF-8', $encoding) + ); } - return strcoll( - static::transcode($str1, 'UTF-8', $encoding), - static::transcode($str2, 'UTF-8', $encoding) - ); + self::restoreLocale(); + + return $result; } /** * Find length of initial segment not matching mask. * - * UTF-8 aware alternative to strcspn() - * - * @param string $str The string to process - * @param string $mask The mask - * @param integer|boolean $start Optional starting character position (in characters) - * @param integer|boolean $length Optional length + * @param string $str The string to process + * @param string $mask The mask + * @param integer $offset [optional] Starting character position (in characters) + * @param integer $length [optional] Length * * @return integer The length of the initial segment of str1 which does not contain any of the characters in str2 * @@ -424,52 +431,48 @@ public static function strcmp($str1, $str2, $locale = false) * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strcspn() instead. * @deprecated 3.0 Please use UTF8::strcspn() instead. */ - public static function strcspn($str, $mask, $start = null, $length = null) + public static function strcspn($str, $mask, $offset = null, $length = null) { if ($length === null) { - if ($start === null) + if ($offset === null) { return UTF8::strcspn($str, $mask); } - return UTF8::strcspn($str, $mask, $start); + return UTF8::strcspn($str, $mask, $offset); } - return UTF8::strcspn($str, $mask, $start, $length); + return UTF8::strcspn($str, $mask, $offset, $length); } /** * Get everything from haystack from the first occurrence of needle to the end. * - * UTF-8 aware alternative to stristr() - * * Needle and haystack are examined in a case-insensitive manner to find the first occurrence of a string using * case-insensitive comparison. * - * @param string $str The haystack - * @param string $search The needle + * @param string $haystack The input string. Must be valid UTF-8. + * @param string $needle The string to look for. Must be valid UTF-8. * - * @return string|boolean + * @return string|false A sub-string, or false if needle is not found. * * @link https://www.php.net/stristr * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::stristr() instead. * @deprecated 3.0 Please use UTF8::stristr() instead. */ - public static function stristr($str, $search) + public static function stristr($haystack, $needle) { - return UTF8::stristr($str, $search); + return UTF8::stristr($haystack, $needle); } /** - * Reverse a string. - * - * UTF-8 aware alternative to strrev() + * Reverse characters order in the string. * * @param string $str String to be reversed * - * @return string The string in reverse character order + * @return string The string with characters in the reverse sequence. * * @link https://www.php.net/strrev * @since 1.3.0 @@ -482,14 +485,12 @@ public static function strrev($str) } /** - * Find length of initial segment matching mask. - * - * UTF-8 aware alternative to strspn() + * Find the length of the initial segment of a string consisting entirely of characters contained within a given mask. * - * @param string $str The haystack - * @param string $mask The mask - * @param integer|null $start Start optional - * @param integer|null $length Length optional + * @param string $str The input string. + * @param string $mask The mask of chars + * @param integer $offset [optional] Start + * @param integer $length [optional] Length * * @return integer * @@ -498,162 +499,162 @@ public static function strrev($str) * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strspn() instead. * @deprecated 3.0 Please use UTF8::strspn() instead. */ - public static function strspn($str, $mask, $start = null, $length = null) + public static function strspn($str, $mask, $offset = null, $length = null) { if ($length === null) { - if ($start === null) + if ($offset === null) { return UTF8::strspn($str, $mask); } - return UTF8::strspn($str, $mask, $start); + return UTF8::strspn($str, $mask, $offset); } - return UTF8::strspn($str, $mask, $start ?? 0, $length); + return UTF8::strspn($str, $mask, $offset ?? 0, $length); } /** * Replace text within a portion of a string. * - * UTF-8 aware alternative to substr_replace() - * - * @param string $str The haystack - * @param string $repl The replacement string - * @param integer $start Start - * @param integer|boolean|null $length Length (optional) - * - * @return string + * @param string[]|string $str The input string or an array of stings. + * @param string[]|string $replacement The replacement string or an array of stings. + * @param integer[]|integer $offset If offset is positive, the replacing will begin at the start'th character + * of the string. + * If offset is negative, the replacing will begin at the start'th character + * from the end of string. + * @param integer[]|integer|null $length [optional] If given and is positive, it represents the length of the + * portion of string which is to be replaced. If it is negative, it + * represents the number of characters from the end of string at which to + * stop replacing. If it is not given, then it will default to + * strlen(string); i.e. end the replacing at the end of string. + * Of course, if length is zero then this function will have the effect + * of inserting replacement into string at the given start offset. + * + * @return string The result string. If string is an array then an array is returned. * * @link https://www.php.net/substr_replace * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::substr_replace() instead. * @deprecated 3.0 Please use UTF8::substr_replace() instead. */ - public static function substr_replace($str, $repl, $start, $length = null) + public static function substr_replace($str, $replacement, $offset, $length = null) { if ($length === false) { $length = null; } - return UTF8::substr_replace($str, $repl, $start, $length); + return UTF8::substr_replace($str, $replacement, $offset, $length); } /** - * Strip whitespace (or other characters) from the beginning of a string. - * - * UTF-8 aware replacement for ltrim() + * Strip whitespace or other characters from the beginning of a string. * * You only need to use this if you are supplying the char list optional arg, and it contains UTF-8 characters. * Otherwise, ltrim will work normally on a UTF-8 string. * - * @param string $str The string to be trimmed - * @param string|boolean|null $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed. + * @param string $chars [optional] Characters to be stripped. * - * @return string The trimmed string + * @return string The string with unwanted characters stripped from the left * * @link https://www.php.net/ltrim * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::ltrim() instead. * @deprecated 3.0 Please use UTF8::ltrim() instead. */ - public static function ltrim($str, $charlist = false) + public static function ltrim($str, $chars = false) { - if ($charlist === '') + if ($chars === '') { return $str; } - if ($charlist === false) + if ($chars === false) { - $charlist = null; + return UTF8::ltrim($str); } - return UTF8::ltrim($str, $charlist); + return UTF8::ltrim($str, $chars); } /** - * Strip whitespace (or other characters) from the end of a string. - * - * UTF-8 aware replacement for rtrim() + * Strip whitespace or other characters from the end of a string. * * You only need to use this if you are supplying the char list optional arg, and it contains UTF-8 characters. * Otherwise, rtrim will work normally on a UTF-8 string. * - * @param string $str The string to be trimmed - * @param string|boolean|null $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed. + * @param string $chars [optional] Characters to be stripped. * - * @return string The trimmed string + * @return string The string with unwanted characters stripped from the right. * * @link https://www.php.net/rtrim * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::rtrim() instead. * @deprecated 3.0 Please use UTF8::rtrim() instead. */ - public static function rtrim($str, $charlist = false) + public static function rtrim($str, $chars = false) { - if ($charlist === '') + if ($chars === '') { return $str; } - if ($charlist === false) + if ($chars === false) { - $charlist = null; + return UTF8::rtrim($str); } - return UTF8::rtrim($str, $charlist); + return UTF8::rtrim($str, $chars); } /** - * Strip whitespace (or other characters) from the beginning and end of a string. - * - * UTF-8 aware replacement for trim() + * Strip whitespace or other characters from the beginning and end of a string. * * You only need to use this if you are supplying the charlist optional arg, and it contains UTF-8 characters. * Otherwise, trim will work normally on a UTF-8 string * - * @param string $str The string to be trimmed - * @param string|boolean|null $charlist The optional charlist of additional characters to trim + * @param string $str The string to be trimmed. + * @param string $chars [optional] Characters to be stripped. * - * @return string The trimmed string + * @return string The string with unwanted characters stripped from both ends. * * @link https://www.php.net/trim * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::trim() instead. * @deprecated 3.0 Please use UTF8::trim() instead. */ - public static function trim($str, $charlist = false) + public static function trim($str, $chars = false) { - if ($charlist === '') + if ($chars === '') { return $str; } - if ($charlist === false) + if ($chars === false) { - $charlist = null; + return UTF8::trim($str); } - return UTF8::trim($str, $charlist); + return UTF8::trim($str, $chars); } /** * Make a string's first character uppercase or all words' first character uppercase. * - * UTF-8 aware alternative to ucfirst() - * - * @param string $str String to be processed - * @param string|null $delimiter The words' delimiter (null means do not split the string) - * @param string|null $newDelimiter The new words delimiter (null means equal to $delimiter) + * @param string $str String to be processed + * @param string $delimiter [optional] The words' delimiter (omitting means do not split the string) + * @param string $newDelimiter [optional] The new delimiter (omitting means equal to $delimiter) * - * @return string If $delimiter is null, return the string with first character as upper case (if applicable) + * @return string If $delimiter is omitted, return the string with first character as upper case (if applicable) * else consider the string of words separated by the delimiter, apply the ucfirst to each word * and return the string with the new delimiter * - * @link https://www.php.net/ucfirst - * @since 1.3.0 + * @link https://www.php.net/ucfirst + * @since 1.3.0 + * @deprecated 3.0 Please use UTF8::ucfirst() instead. To reproduce the delimiter splitting and re-joining, use explode() and implode(). */ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) { @@ -673,9 +674,7 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) /** * Uppercase the first character of each word in a string. * - * UTF-8 aware alternative to ucwords() - * - * @param string $str String to be processed + * @param string $str The input string. * * @return string String with first char of each word uppercase * @@ -696,10 +695,11 @@ public static function ucwords($str) * @param string $fromEncoding The source encoding. * @param string $toEncoding The target encoding. * - * @return string|null The transcoded string, or null if the source was not a string. + * @return string|false The converted string, or false on failure. * * @link https://bugs.php.net/bug.php?id=48147 - * + * @see UTF8::to_iso8859() + * @see UTF8::to_utf8() * @since 1.3.0 */ public static function transcode($source, $fromEncoding, $toEncoding) @@ -710,17 +710,13 @@ public static function transcode($source, $fromEncoding, $toEncoding) } /** - * Tests a string whether it's valid UTF-8 and supported by the Unicode standard. + * Check whether the passed input contains only byte sequences that appear valid UTF-8. * - * Note: this function has been modified to simple return true or false. - * - * @param string $str UTF-8 encoded string. + * @param string $str The input to be checked. * * @return boolean true if valid * - * @author - * @link https://hsivonen.fi/php-utf8/ - * @see compliant + * @see self::compliant * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_utf8() instead. * @deprecated 3.0 Please use UTF8::is_utf8() instead. @@ -731,20 +727,13 @@ public static function valid($str) } /** - * Tests whether a string complies as UTF-8. - * - * This will be much faster than StringHelper::valid() but will pass five and six octet UTF-8 sequences, which are - * not supported by Unicode and so cannot be displayed correctly in a browser. In other words it is not as strict - * as StringHelper::valid() but it's faster. If you use it to validate user input, you place yourself at the risk - * that attackers will be able to inject 5 and 6 byte sequences (which may or may not be a significant risk, - * depending on what you are doing). + * Check whether the passed input contains only byte sequences that appear valid UTF-8. * - * @param string $str UTF-8 string to check + * @param string $str The input to be checked. * - * @return boolean TRUE if string is valid UTF-8 + * @return boolean true if valid * - * @see StringHelper::valid - * @link https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 + * @see self::valid * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::is_utf8() instead. * @deprecated 3.0 Please use UTF8::is_utf8() instead. @@ -755,7 +744,7 @@ public static function compliant($str) } /** - * Converts UTF-8 sequences to UTF-8 string. + * Convert UTF-8 sequence to UTF-8 string. * * @param string $str Unicode string to convert * @@ -771,7 +760,7 @@ public static function unicode_to_utf8($str) } /** - * Converts UTF-16 sequences to UTF-8 string. + * Convert UTF-16 sequence to UTF-8 string. * * @param string $str Unicode string to convert * @@ -806,17 +795,18 @@ private static function splitSearchReplace($value): array } /** - * @param string[]|string $locale The locale + * @param string[]|string $locale The locale(s) * - * @return string + * @return string The encoding */ private static function setLocale($locale): string { - $locale = setlocale(LC_COLLATE, $locale); + self::$currentLocale = setlocale(LC_COLLATE, 0); + $locale = setlocale(LC_COLLATE, $locale); if ($locale === false) { - $locale = setlocale(LC_COLLATE, 0); + $locale = (string) self::$currentLocale; } // See if we have successfully set locale to UTF-8 @@ -838,4 +828,12 @@ private static function setLocale($locale): string return $encoding; } + + /** + * @return void + */ + private static function restoreLocale(): void + { + setlocale(LC_COLLATE, self::$currentLocale); + } } From 2a8bc6c39b7b92b659ef01e56127b48ecca8302e Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Wed, 22 Sep 2021 19:55:37 +0200 Subject: [PATCH 04/23] Chore - Remove polyfill replacement --- composer.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/composer.json b/composer.json index c6d05746..271f7fd5 100644 --- a/composer.json +++ b/composer.json @@ -24,13 +24,6 @@ "conflict": { "doctrine/inflector": "<1.2" }, - "replace": { - "symfony/polyfill-php72": "1.99", - "symfony/polyfill-iconv": "1.99", - "symfony/polyfill-intl-grapheme": "1.99", - "symfony/polyfill-intl-normalizer": "1.99", - "symfony/polyfill-mbstring": "1.99" - }, "suggest": { "ext-mbstring": "For improved performance", "ext-iconv": "For improved performance", From ec6b672aeffef432dbc1dd18ce847cd2ab534fd8 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 00:32:12 +0200 Subject: [PATCH 05/23] Refactor - Try to_utf8() instead of to_utf8_string(); the former is older, test results are the same --- src/StringHelper.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 1db393b4..782fb177 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -756,7 +756,7 @@ public static function compliant($str) */ public static function unicode_to_utf8($str) { - return UTF8::to_utf8_string($str); + return UTF8::to_utf8($str); } /** @@ -772,7 +772,7 @@ public static function unicode_to_utf8($str) */ public static function unicode_to_utf16($str) { - return UTF8::to_utf8_string($str); + return UTF8::to_utf8($str); } /** From db20f8b9a5582400410f43e70dd7b5f37439b6fa Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:18:50 +0200 Subject: [PATCH 06/23] Chore - Provide locales to CI environment --- .drone.jsonnet | 10 ++++++++++ .drone.yml | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 00a89d9d..0b81a8b1 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -22,6 +22,15 @@ local composer(phpversion, params) = { ] }; +local locales(phpversion) = { + name: "locales", + image: "joomlaprojects/docker-images:php" + phpversion, + commands: [ + "localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8", + "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" + ] +}; + local phpunit(phpversion) = { name: "PHPUnit", image: "joomlaprojects/docker-images:php" + phpversion, @@ -34,6 +43,7 @@ local pipeline(name, phpversion, params) = { name: "PHP " + name, volumes: hostvolumes, steps: [ + locales(phpversion), composer(phpversion, params), phpunit(phpversion) ], diff --git a/.drone.yml b/.drone.yml index dce1bba4..079efca5 100644 --- a/.drone.yml +++ b/.drone.yml @@ -65,6 +65,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php7.2 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php7.2 commands: @@ -93,6 +99,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php7.2 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php7.2 commands: @@ -121,6 +133,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php7.3 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php7.3 commands: @@ -149,6 +167,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php7.4 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php7.4 commands: @@ -177,6 +201,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php8.0 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php8.0 commands: @@ -205,6 +235,12 @@ platform: arch: amd64 steps: +- name: locales + image: joomlaprojects/docker-images:php8.1 + commands: + - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - name: composer image: joomlaprojects/docker-images:php8.1 commands: @@ -227,6 +263,6 @@ volumes: --- kind: signature -hmac: 4af173bc17cfa22a3f0fcef83a9535adb76d5b3727ab33d8d3c4a51f994525a3 +hmac: f8fc60bb7a050364bba020a07c19d8f1eb16a8c019cc8e778e63485ab19e58ab ... From d1da205a24097d4ce6e423b08047a60d4e164e6b Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:24:50 +0200 Subject: [PATCH 07/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 3 ++- .drone.yml | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 0b81a8b1..5ddae931 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,7 +26,8 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8", + "yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru", + "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" ] }; diff --git a/.drone.yml b/.drone.yml index 079efca5..2ffda57b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,7 +68,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -102,7 +103,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -136,7 +138,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -170,7 +173,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -204,7 +208,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -238,7 +243,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - localedef -c -i fr_FR -f UTF8 ru_RU.UTF-8 + - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - name: composer @@ -263,6 +269,6 @@ volumes: --- kind: signature -hmac: f8fc60bb7a050364bba020a07c19d8f1eb16a8c019cc8e778e63485ab19e58ab +hmac: 4ec44e50735d79a749b11df40520497656b7319d7d3b7038cf8fd31e06e8fb9e ... From 449b8c9a63041e584f54cf3bc209149759f365ca Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:26:27 +0200 Subject: [PATCH 08/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 1 + .drone.yml | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 5ddae931..d437c2a0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,6 +26,7 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ + "apt install -y yum", "yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" diff --git a/.drone.yml b/.drone.yml index 2ffda57b..4fbfcdaf 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,6 +68,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -103,6 +104,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -138,6 +140,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -173,6 +176,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -208,6 +212,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -243,6 +248,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: + - apt install -y yum - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -269,6 +275,6 @@ volumes: --- kind: signature -hmac: 4ec44e50735d79a749b11df40520497656b7319d7d3b7038cf8fd31e06e8fb9e +hmac: 00e841dad9c8532dcf8d19f40d1797af02d678a290f65bf37c41739598bc7425 ... From 0664c5f7efbc0ca902f840df11fd49f4dd5ad604 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:35:11 +0200 Subject: [PATCH 09/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 3 +-- .drone.yml | 20 +++++++------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index d437c2a0..c4150f43 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,8 +26,7 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "apt install -y yum", - "yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru", + "apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" ] diff --git a/.drone.yml b/.drone.yml index 4fbfcdaf..8c785fd1 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,8 +68,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -104,8 +103,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -140,8 +138,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -176,8 +173,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -212,8 +208,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -248,8 +243,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - apt install -y yum - - yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -275,6 +269,6 @@ volumes: --- kind: signature -hmac: 00e841dad9c8532dcf8d19f40d1797af02d678a290f65bf37c41739598bc7425 +hmac: af74c5ce6a60611db19c0d5c7b61d84871f6875570b385ad1f01c0c85942a6e4 ... From 113daa6bf8cade2464da186e0407715f913c7df6 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:46:57 +0200 Subject: [PATCH 10/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 2 +- .drone.yml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index c4150f43..affd3c48 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,7 +26,7 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru", + "apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" ] diff --git a/.drone.yml b/.drone.yml index 8c785fd1..c7170554 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,7 +68,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -103,7 +103,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -138,7 +138,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -173,7 +173,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -208,7 +208,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -243,7 +243,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - apt-get update -y && apt-get install -y yum-utils && yum -y install glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -269,6 +269,6 @@ volumes: --- kind: signature -hmac: af74c5ce6a60611db19c0d5c7b61d84871f6875570b385ad1f01c0c85942a6e4 +hmac: ce6fca9a678aa75069489176e356b47914c8f9e649bac75fa8f5b702ec010a21 ... From 29ecc02cdcc4aad382e467ebabd43ad95215bc00 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 19:53:49 +0200 Subject: [PATCH 11/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 2 +- .drone.yml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index affd3c48..fe3b26d8 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,7 +26,7 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru", + "apt-get update -y && apt-get install -y glibc-source locales-all", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" ] diff --git a/.drone.yml b/.drone.yml index c7170554..18d64dc8 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,7 +68,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -103,7 +103,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -138,7 +138,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -173,7 +173,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -208,7 +208,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -243,7 +243,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - apt-get update -y && apt-get install -y glibc-locale-source glibc-langpack-fr glibc-langpack-ru + - apt-get update -y && apt-get install -y glibc-source locales-all - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -269,6 +269,6 @@ volumes: --- kind: signature -hmac: ce6fca9a678aa75069489176e356b47914c8f9e649bac75fa8f5b702ec010a21 +hmac: b6ed17a485142300a60c4de9c553c9584e544ee379b05762a8f2950c56ca7406 ... From d82e1e5e37803907a35ec73059df2af08e2a5d61 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:10:28 +0200 Subject: [PATCH 12/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 4 ++-- .drone.yml | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index fe3b26d8..8ba9c2bf 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -27,8 +27,8 @@ local locales(phpversion) = { image: "joomlaprojects/docker-images:php" + phpversion, commands: [ "apt-get update -y && apt-get install -y glibc-source locales-all", - "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", - "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251" + "locale-gen fr_FR.UTF-8", + "locale-gen ru_RU.CP1251" ] }; diff --git a/.drone.yml b/.drone.yml index 18d64dc8..17547a9b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -69,8 +69,8 @@ steps: image: joomlaprojects/docker-images:php7.2 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php7.2 @@ -104,8 +104,8 @@ steps: image: joomlaprojects/docker-images:php7.2 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php7.2 @@ -139,8 +139,8 @@ steps: image: joomlaprojects/docker-images:php7.3 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php7.3 @@ -174,8 +174,8 @@ steps: image: joomlaprojects/docker-images:php7.4 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php7.4 @@ -209,8 +209,8 @@ steps: image: joomlaprojects/docker-images:php8.0 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php8.0 @@ -244,8 +244,8 @@ steps: image: joomlaprojects/docker-images:php8.1 commands: - apt-get update -y && apt-get install -y glibc-source locales-all - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale-gen fr_FR.UTF-8 + - locale-gen ru_RU.CP1251 - name: composer image: joomlaprojects/docker-images:php8.1 @@ -269,6 +269,6 @@ volumes: --- kind: signature -hmac: b6ed17a485142300a60c4de9c553c9584e544ee379b05762a8f2950c56ca7406 +hmac: 3dc98958d0298aa7b1ed6c1f02945631942597b5b014419021f0dc6094e56d88 ... From 774f52e47d0bb9f8ba8596604f076f836e885c39 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:14:21 +0200 Subject: [PATCH 13/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 8ba9c2bf..4d2962ca 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,7 +26,8 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "apt-get update -y && apt-get install -y glibc-source locales-all", + "locale", + "locale -a", "locale-gen fr_FR.UTF-8", "locale-gen ru_RU.CP1251" ] From 29fc57921668b7dcb3105217203fa7878933be0c Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:15:19 +0200 Subject: [PATCH 14/23] Chore - Provide locales to CI environment (cont'd) --- .drone.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.drone.yml b/.drone.yml index 17547a9b..69a631d2 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,7 +68,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -103,7 +104,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -138,7 +140,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -173,7 +176,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -208,7 +212,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -243,7 +248,8 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - apt-get update -y && apt-get install -y glibc-source locales-all + - locale + - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -269,6 +275,6 @@ volumes: --- kind: signature -hmac: 3dc98958d0298aa7b1ed6c1f02945631942597b5b014419021f0dc6094e56d88 +hmac: 74cc459bfb9c6997ad713851e3f4b30eab5f6b89efda3a708ecb745ee2739856 ... From 70effe2b5344288e67d975404dd735e650737844 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:17:53 +0200 Subject: [PATCH 15/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 2 +- .drone.yml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 4d2962ca..13a60427 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -26,7 +26,7 @@ local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, commands: [ - "locale", + "apt-get clean && apt-get update && apt-get install -y locales", "locale -a", "locale-gen fr_FR.UTF-8", "locale-gen ru_RU.CP1251" diff --git a/.drone.yml b/.drone.yml index 69a631d2..7eb8b7bb 100644 --- a/.drone.yml +++ b/.drone.yml @@ -68,7 +68,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -104,7 +104,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.2 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -140,7 +140,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.3 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -176,7 +176,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php7.4 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -212,7 +212,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.0 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -248,7 +248,7 @@ steps: - name: locales image: joomlaprojects/docker-images:php8.1 commands: - - locale + - apt-get clean && apt-get update && apt-get install -y locales - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 @@ -275,6 +275,6 @@ volumes: --- kind: signature -hmac: 74cc459bfb9c6997ad713851e3f4b30eab5f6b89efda3a708ecb745ee2739856 +hmac: 0c0ce713f539a6cbb658fa78592ff7fba9aaa092487bc0275c07f5869ad0036d ... From 1332a10b8c19ca3d0398d616d151e19aee8500fa Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:22:19 +0200 Subject: [PATCH 16/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 4 +++- .drone.yml | 10 +++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 13a60427..fcfb073d 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -25,11 +25,13 @@ local composer(phpversion, params) = { local locales(phpversion) = { name: "locales", image: "joomlaprojects/docker-images:php" + phpversion, + [if phpversion == "7.2" then "failure"]: "ignore", commands: [ "apt-get clean && apt-get update && apt-get install -y locales", "locale -a", "locale-gen fr_FR.UTF-8", - "locale-gen ru_RU.CP1251" + "locale-gen ru_RU.CP1251", + "locale -a" ] }; diff --git a/.drone.yml b/.drone.yml index 7eb8b7bb..3f98e634 100644 --- a/.drone.yml +++ b/.drone.yml @@ -72,6 +72,8 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a + failure: ignore - name: composer image: joomlaprojects/docker-images:php7.2 @@ -108,6 +110,8 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a + failure: ignore - name: composer image: joomlaprojects/docker-images:php7.2 @@ -144,6 +148,7 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a - name: composer image: joomlaprojects/docker-images:php7.3 @@ -180,6 +185,7 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a - name: composer image: joomlaprojects/docker-images:php7.4 @@ -216,6 +222,7 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a - name: composer image: joomlaprojects/docker-images:php8.0 @@ -252,6 +259,7 @@ steps: - locale -a - locale-gen fr_FR.UTF-8 - locale-gen ru_RU.CP1251 + - locale -a - name: composer image: joomlaprojects/docker-images:php8.1 @@ -275,6 +283,6 @@ volumes: --- kind: signature -hmac: 0c0ce713f539a6cbb658fa78592ff7fba9aaa092487bc0275c07f5869ad0036d +hmac: 8f4fd9633fff950a3137e1dec0cc4639f6c41056c48bee8e3d0be0b2485dce73 ... From de8f18a33dfd83e59a6a3841d0dff55bfe7e8ce0 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:26:42 +0200 Subject: [PATCH 17/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 4 ++-- .drone.yml | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index fcfb073d..85ecc4cb 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -29,8 +29,8 @@ local locales(phpversion) = { commands: [ "apt-get clean && apt-get update && apt-get install -y locales", "locale -a", - "locale-gen fr_FR.UTF-8", - "locale-gen ru_RU.CP1251", + "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", + "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251", "locale -a" ] }; diff --git a/.drone.yml b/.drone.yml index 3f98e634..8f6488ac 100644 --- a/.drone.yml +++ b/.drone.yml @@ -70,8 +70,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a failure: ignore @@ -108,8 +108,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a failure: ignore @@ -146,8 +146,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a - name: composer @@ -183,8 +183,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a - name: composer @@ -220,8 +220,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a - name: composer @@ -257,8 +257,8 @@ steps: commands: - apt-get clean && apt-get update && apt-get install -y locales - locale -a - - locale-gen fr_FR.UTF-8 - - locale-gen ru_RU.CP1251 + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - locale -a - name: composer @@ -283,6 +283,6 @@ volumes: --- kind: signature -hmac: 8f4fd9633fff950a3137e1dec0cc4639f6c41056c48bee8e3d0be0b2485dce73 +hmac: cc6e0cb7cb2c0a1abc50e6e2e94e260e349b800df17332b09750a02de7f584e7 ... From 5f842109a9b78128ff517c95f99b9c099ee9c3c0 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:28:55 +0200 Subject: [PATCH 18/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 17 +++------- .drone.yml | 88 ++++++++++++++++++-------------------------------- 2 files changed, 36 insertions(+), 69 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 85ecc4cb..6e5087e7 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -22,32 +22,25 @@ local composer(phpversion, params) = { ] }; -local locales(phpversion) = { - name: "locales", +local phpunit(phpversion) = { + name: "PHPUnit", image: "joomlaprojects/docker-images:php" + phpversion, - [if phpversion == "7.2" then "failure"]: "ignore", + [if phpversion == "8.1" then "failure"]: "ignore", commands: [ "apt-get clean && apt-get update && apt-get install -y locales", "locale -a", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251", - "locale -a" + "locale -a", + "vendor/bin/phpunit" ] }; -local phpunit(phpversion) = { - name: "PHPUnit", - image: "joomlaprojects/docker-images:php" + phpversion, - [if phpversion == "8.1" then "failure"]: "ignore", - commands: ["vendor/bin/phpunit"] -}; - local pipeline(name, phpversion, params) = { kind: "pipeline", name: "PHP " + name, volumes: hostvolumes, steps: [ - locales(phpversion), composer(phpversion, params), phpunit(phpversion) ], diff --git a/.drone.yml b/.drone.yml index 8f6488ac..9abbe0ef 100644 --- a/.drone.yml +++ b/.drone.yml @@ -65,16 +65,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php7.2 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - failure: ignore - - name: composer image: joomlaprojects/docker-images:php7.2 commands: @@ -87,6 +77,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.2 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit volumes: @@ -103,16 +98,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php7.2 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - failure: ignore - - name: composer image: joomlaprojects/docker-images:php7.2 commands: @@ -125,6 +110,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.2 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit volumes: @@ -141,15 +131,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php7.3 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - - name: composer image: joomlaprojects/docker-images:php7.3 commands: @@ -162,6 +143,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.3 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit volumes: @@ -178,15 +164,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php7.4 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - - name: composer image: joomlaprojects/docker-images:php7.4 commands: @@ -199,6 +176,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.4 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit volumes: @@ -215,15 +197,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php8.0 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - - name: composer image: joomlaprojects/docker-images:php8.0 commands: @@ -236,6 +209,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php8.0 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit volumes: @@ -252,15 +230,6 @@ platform: arch: amd64 steps: -- name: locales - image: joomlaprojects/docker-images:php8.1 - commands: - - apt-get clean && apt-get update && apt-get install -y locales - - locale -a - - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 - - locale -a - - name: composer image: joomlaprojects/docker-images:php8.1 commands: @@ -273,6 +242,11 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php8.1 commands: + - apt-get clean && apt-get update && apt-get install -y locales + - locale -a + - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 + - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 + - locale -a - vendor/bin/phpunit failure: ignore @@ -283,6 +257,6 @@ volumes: --- kind: signature -hmac: cc6e0cb7cb2c0a1abc50e6e2e94e260e349b800df17332b09750a02de7f584e7 +hmac: c8d21be1c7b1680ea20d25073a8a76ef47d48d81f550e8868b619848f78c54a5 ... From d81a9e8300410d887525aef5f0e40d165825242f Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Thu, 23 Sep 2021 20:41:02 +0200 Subject: [PATCH 19/23] Chore - Provide locales to CI environment (cont'd) --- .drone.jsonnet | 2 +- .drone.yml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 6e5087e7..41df609e 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -27,7 +27,7 @@ local phpunit(phpversion) = { image: "joomlaprojects/docker-images:php" + phpversion, [if phpversion == "8.1" then "failure"]: "ignore", commands: [ - "apt-get clean && apt-get update && apt-get install -y locales", + "apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales", "locale -a", "localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8", "localedef -c -i ru_RU -f CP1251 ru_RU.CP1251", diff --git a/.drone.yml b/.drone.yml index 9abbe0ef..f9330bcf 100644 --- a/.drone.yml +++ b/.drone.yml @@ -77,7 +77,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.2 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -110,7 +110,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.2 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -143,7 +143,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.3 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -176,7 +176,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php7.4 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -209,7 +209,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php8.0 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -242,7 +242,7 @@ steps: - name: PHPUnit image: joomlaprojects/docker-images:php8.1 commands: - - apt-get clean && apt-get update && apt-get install -y locales + - apt-get clean && apt-get --allow-releaseinfo-change update && apt-get install -y locales - locale -a - localedef -c -i fr_FR -f UTF-8 fr_FR.UTF-8 - localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 @@ -257,6 +257,6 @@ volumes: --- kind: signature -hmac: c8d21be1c7b1680ea20d25073a8a76ef47d48d81f550e8868b619848f78c54a5 +hmac: 7a94947f6defafe4679795a1b288940c9aff85ef981a9d6e47b7df2fb4addf8f ... From 3bf06f3a61806cfc9ca4436da3d6cc7bcf19401c Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Fri, 24 Sep 2021 12:20:37 +0200 Subject: [PATCH 20/23] Style - Remove @noinspection annotations --- Tests/InflectorTest.php | 3 --- Tests/StringHelperTest.php | 3 +-- src/StringHelper.php | 5 ----- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/Tests/InflectorTest.php b/Tests/InflectorTest.php index a694e05c..553a86f0 100644 --- a/Tests/InflectorTest.php +++ b/Tests/InflectorTest.php @@ -2,9 +2,6 @@ /** * @copyright Copyright (C) 2005 - 2021 Open Source Matters, Inc. All rights reserved. * @license GNU General Public License version 2 or later; see LICENSE - * - * @noinspection PhpDeprecationInspection - * @noinspection SpellCheckingInspection */ namespace Joomla\String\Tests; diff --git a/Tests/StringHelperTest.php b/Tests/StringHelperTest.php index f65a16d3..16534527 100644 --- a/Tests/StringHelperTest.php +++ b/Tests/StringHelperTest.php @@ -1,5 +1,4 @@ - Date: Fri, 24 Sep 2021 13:01:45 +0200 Subject: [PATCH 21/23] Fix - Set default value for locale to null in strcmp Docs - Add change of default values to inline documentation --- src/StringHelper.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 8cb315fa..d3927f7a 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -56,7 +56,8 @@ abstract class StringHelper * * @return string The incremented string. * - * @since 1.3.0 + * @since 1.3.0 + * @since __DEPLOY_VERSION__ Default value for omitted parameter $n is now null (was 0) */ public static function increment($string, $style = 'default', $n = null) { @@ -140,6 +141,7 @@ public static function ord($chr) * @link https://www.php.net/strpos * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strpos() instead. + * @since __DEPLOY_VERSION__ Default value for omitted parameter $offset is now null (was false) * @deprecated 3.0 Please use UTF8::strpos() instead. */ public static function strpos($haystack, $needle, $offset = null) @@ -162,6 +164,7 @@ public static function strpos($haystack, $needle, $offset = null) * @link https://www.php.net/strrpos * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strrpos() instead. + * @since __DEPLOY_VERSION__ Default value for omitted parameter $offset is now null (was 0) * @deprecated 3.0 Please use UTF8::strrpos() instead. */ public static function strrpos($haystack, $needle, $offset = null) @@ -182,6 +185,7 @@ public static function strrpos($haystack, $needle, $offset = null) * @link https://www.php.net/substr * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::substr() instead. + * @since __DEPLOY_VERSION__ Default value for omitted parameter $length is now null (was false) * @deprecated 3.0 Please use UTF8::substr() instead. */ public static function substr($str, $offset, $length = null) @@ -337,6 +341,7 @@ public static function str_split($str, $splitLen = 1) * @link https://www.php.net/setlocale * @since 1.3.0 * @since __DEPLOY_VERSION__ Restores locale after comparision. + * @since __DEPLOY_VERSION__ Default value for omitted parameter $locale is now null (was false) */ public static function strcasecmp($str1, $str2, $locale = null) { @@ -383,8 +388,9 @@ public static function strcasecmp($str1, $str2, $locale = null) * @link https://www.php.net/setlocale * @since 1.3.0 * @since __DEPLOY_VERSION__ Restores locale after comparision. + * @since __DEPLOY_VERSION__ Default value for omitted parameter $locale is now null (was false) */ - public static function strcmp($str1, $str2, $locale = false) + public static function strcmp($str1, $str2, $locale = null) { if ($locale === false) { @@ -801,7 +807,7 @@ private static function setLocale($locale): string if ($locale === false) { - $locale = (string) self::$currentLocale; + $locale = (string)self::$currentLocale; } // See if we have successfully set locale to UTF-8 From b571471c0a3363c42d8d35cc5d8bd2e54b98983d Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Fri, 24 Sep 2021 14:21:26 +0200 Subject: [PATCH 22/23] Fix - Remove b/c incompatibilities Refactor - Set default to 0 in strpos and strrpos --- src/StringHelper.php | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index d3927f7a..1a0d9c82 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -141,12 +141,12 @@ public static function ord($chr) * @link https://www.php.net/strpos * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strpos() instead. - * @since __DEPLOY_VERSION__ Default value for omitted parameter $offset is now null (was false) + * @since __DEPLOY_VERSION__ Default value for omitted parameter $offset is now 0 (was false) * @deprecated 3.0 Please use UTF8::strpos() instead. */ - public static function strpos($haystack, $needle, $offset = null) + public static function strpos($haystack, $needle, $offset = 0) { - return UTF8::strpos($haystack, $needle, $offset ?? 0); + return UTF8::strpos($haystack, $needle, $offset ?: 0); } /** @@ -164,12 +164,11 @@ public static function strpos($haystack, $needle, $offset = null) * @link https://www.php.net/strrpos * @since 1.3.0 * @since __DEPLOY_VERSION__ Deprecated. Use UTF8::strrpos() instead. - * @since __DEPLOY_VERSION__ Default value for omitted parameter $offset is now null (was 0) * @deprecated 3.0 Please use UTF8::strrpos() instead. */ - public static function strrpos($haystack, $needle, $offset = null) + public static function strrpos($haystack, $needle, $offset = 0) { - return UTF8::strrpos($haystack, $needle, $offset ?? 0); + return UTF8::strrpos($haystack, $needle, $offset ?: 0); } /** @@ -190,6 +189,11 @@ public static function strrpos($haystack, $needle, $offset = null) */ public static function substr($str, $offset, $length = null) { + if ($length === false) + { + $length = null; + } + return UTF8::substr($str, $offset, $length); } From 9c10eb8adb940124cbea3614df23b5623442a1f3 Mon Sep 17 00:00:00 2001 From: Niels Braczek Date: Fri, 24 Sep 2021 14:26:33 +0200 Subject: [PATCH 23/23] Style - CS fix --- src/StringHelper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 1a0d9c82..aebed6c0 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -811,7 +811,7 @@ private static function setLocale($locale): string if ($locale === false) { - $locale = (string)self::$currentLocale; + $locale = (string) self::$currentLocale; } // See if we have successfully set locale to UTF-8