-
Notifications
You must be signed in to change notification settings - Fork 539
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for two bugs related to Unicode translation support by Font objec…
…ts (#698) * Fix for two bugs related to Unicode translation support by Font objects Symptom was that some documents' contents was rendering as a bunch of control characters. These are the untranslated strings. This was happening because for two different reasons, these strings weren't being translated \Smalot\PdfParser\Font::decodeContent() in some circumstances. First fix is to \Smalot\PdfParser\Font::loadTranslateTable(): - Fixed bug where bfchar sections weren't loaded due to mistake in regexp. - It now uses `*` instead of `+` and thus supports translation tables with lines like `<0000><0000>`. (Required `<0000> <0000>` before.) Second fix is for documents that attach their Font objects to the Pages object instead of each Page object: - \Smalot\PdfParser\Page now has a setFonts() method - \Smalot\PdfParser\Pages now declares its $fonts variable - \Smalot\PdfParser\Pages::getPages() now applies the object's fonts to each child Page - \Smalot\PdfParser\Pages::getFonts() copied from Page class * Pages.php typo fix Added relative namespace for `ElementMissing` * Update src/Smalot/PdfParser/Page.php to make doc comment for setFonts() Co-authored-by: Konrad Abicht <[email protected]> * Update Pages.php to make getFonts() protected Requested by @k00ni * Refined Pages instance new getFonts method not only returned stored fonts but also built related fonts list. With this changes its easier to test and the replacement (setupFonts) only builds the fonts list. Also refined some phpdoc comments. * Added PagesTest.php with two integration tests * added more information to PagesTest.php * PagesTest.php: fixed comment * PagesTest.php: added test case which doesn't depend on setFonts explicitly * PagesTest.php: fixed syntax error * PagesTest.php: deployed suggested test from @GreyWyvern Reference: #698 (comment) --------- Co-authored-by: Konrad Abicht <[email protected]>
- Loading branch information
Showing
4 changed files
with
178 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
<?php | ||
|
||
/** | ||
* @file This file is part of the PdfParser library. | ||
* | ||
* @author Konrad Abicht <[email protected]> | ||
* | ||
* @date 2024-04-19 | ||
* | ||
* @license LGPLv3 | ||
* | ||
* @url <https://github.com/smalot/pdfparser> | ||
* | ||
* PdfParser is a pdf library written in PHP, extraction oriented. | ||
* Copyright (C) 2017 - Sébastien MALOT <[email protected]> | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this program. | ||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>. | ||
*/ | ||
|
||
namespace PHPUnitTests\Integration; | ||
|
||
use PHPUnitTests\TestCase; | ||
use Smalot\PdfParser\Document; | ||
use Smalot\PdfParser\Element\ElementArray; | ||
use Smalot\PdfParser\Font; | ||
use Smalot\PdfParser\Header; | ||
use Smalot\PdfParser\Page; | ||
use Smalot\PdfParser\Pages; | ||
|
||
/** | ||
* @internal only for test purposes | ||
*/ | ||
class PagesDummy extends Pages | ||
{ | ||
/** | ||
* The purpose of this function is to bypass the tedious | ||
* work to setup instances which lead to a valid $fonts variable. | ||
* | ||
* @param array<\Smalot\PdfParser\Font> $fonts | ||
* | ||
* @return void | ||
*/ | ||
public function setFonts($fonts) | ||
{ | ||
$this->fonts = $fonts; | ||
} | ||
} | ||
|
||
class PagesTest extends TestCase | ||
{ | ||
public function testFontsArePassedFromPagesToPage(): void | ||
{ | ||
// Create mock Document, Font and Page objects | ||
$document = $this->createMock(Document::class); | ||
$font1 = new Font($document); | ||
$page = new Page($document); | ||
|
||
// Create a Header object that indicates $page is a child | ||
$header = new Header([ | ||
'Kids' => new ElementArray([ | ||
$page, | ||
]), | ||
], $document); | ||
|
||
// Use this header to create a mock Pages object | ||
$pages = new PagesDummy($document, $header); | ||
|
||
// Apply $font1 as a Font object to this Pages object; | ||
// setFonts is used here as part of PagesDummy, only to access | ||
// the protected Pages::fonts variable; it is not a method | ||
// available in production | ||
$pages->setFonts([$font1]); | ||
|
||
// Trigger setupFonts method in $pages | ||
$pages->getPages(true); | ||
|
||
// Since the $page object font list is empty, $font1 from Pages | ||
// object must be passed to the Page object | ||
$this->assertEquals([$font1], $page->getFonts()); | ||
|
||
// Create a second $font2 using a different method | ||
$font2 = $this->createMock(Font::class); | ||
|
||
// Update the fonts in $pages | ||
$pages->setFonts([$font1, $font2]); | ||
|
||
// Trigger setupFonts method in $pages | ||
$pages->getPages(true); | ||
|
||
// Now that $page already has a font, updates from $pages | ||
// should not overwrite it | ||
$this->assertEquals([$font1], $page->getFonts()); | ||
} | ||
} |