Skip to content

Commit

Permalink
Improve name encoding support for platform 3
Browse files Browse the repository at this point in the history
Most name strings should be encoded with UTF-16BE per the spec, but there are situations where other encodings are required or acceptable. This change only addresses a subset of potential encodings.

fixes #70
  • Loading branch information
bsweeney committed Dec 12, 2023
1 parent c5f7810 commit 84c64a7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
44 changes: 41 additions & 3 deletions src/FontLib/Table/Type/name.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,49 @@ protected function _parse() {
$records[] = $record;
}

$system_encodings = mb_list_encodings();
foreach ($system_encodings as $enc) {
$system_encodings = array_merge($system_encodings, @mb_encoding_aliases($enc));
}
$system_encodings = array_change_key_case(array_fill_keys($system_encodings, true), CASE_UPPER);

$names = array();
foreach ($records as $record) {
$font->seek($tableOffset + $data["stringOffset"] + $record->offset);
$s = $font->read($record->length);
$record->string = Font::UTF16ToUTF8($s);
$record->stringRaw = $font->read($record->length);

$encoding = "UTF-16";
switch ($record->platformID) {
case 3:
switch ($record->platformSpecificID) {
case 2:
if (\array_key_exists("SJIS", $system_encodings)) {
$encoding = mb_detect_encoding($record->stringRaw, ["SJIS", "UTF-16"], true);
}
break;
case 3:
if (\array_key_exists("GB18030", $system_encodings)) {
$encoding = mb_detect_encoding($record->stringRaw, ["GB18030", "UTF-16"], true);
}
break;
case 4:
if (\array_key_exists("BIG-5", $system_encodings)) {
$encoding = mb_detect_encoding($record->stringRaw, ["BIG-5", "UTF-16"], true);
}
break;
case 5:
if (\array_key_exists("UHC", $system_encodings)) {
$encoding = mb_detect_encoding($record->stringRaw, ["UHC", "UTF-16"], true);
}
break;
}
break;
}
if ($encoding === false) {
$encoding = "UTF-16";
}

$record->string = mb_convert_encoding($record->stringRaw, "UTF-8", $encoding);
$names[$record->nameID] = $record;
}

Expand Down Expand Up @@ -184,7 +222,7 @@ protected function _encode() {
}

foreach ($records as $record) {
$str = $record->getUTF16();
$str = $record->stringRaw;
$length += $font->write($str, mb_strlen($str, "8bit"));
}

Expand Down
1 change: 1 addition & 0 deletions src/FontLib/Table/Type/nameRecord.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class nameRecord extends BinaryStream {
public $length;
public $offset;
public $string;
public $stringRaw;

public static $format = array(
"platformID" => self::uint16,
Expand Down

0 comments on commit 84c64a7

Please sign in to comment.