Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #182

Merged
merged 13 commits into from
Sep 28, 2023
82 changes: 68 additions & 14 deletions src/PdfParser/PdfParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
use setasign\Fpdi\PdfParser\Type\PdfString;
use setasign\Fpdi\PdfParser\Type\PdfToken;
use setasign\Fpdi\PdfParser\Type\PdfType;
use setasign\Fpdi\PdfParser\Type\PdfTypeException;

/**
* A PDF parser class
Expand Down Expand Up @@ -258,25 +259,25 @@ public function readValue($token = null, $expectedType = null)
switch ($token) {
case '(':
$this->ensureExpectedType($token, $expectedType);
return PdfString::parse($this->streamReader);
return $this->parsePdfString();

case '<':
if ($this->streamReader->getByte() === '<') {
$this->ensureExpectedType('<<', $expectedType);
$this->streamReader->addOffset(1);
return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
return $this->parsePdfDictionary();
}

$this->ensureExpectedType($token, $expectedType);
return PdfHexString::parse($this->streamReader);
return $this->parsePdfHexString();

case '/':
$this->ensureExpectedType($token, $expectedType);
return PdfName::parse($this->tokenizer, $this->streamReader);
return $this->parsePdfName();

case '[':
$this->ensureExpectedType($token, $expectedType);
return PdfArray::parse($this->tokenizer, $this);
return $this->parsePdfArray();

default:
if (\is_numeric($token)) {
Expand All @@ -291,13 +292,7 @@ public function readValue($token = null, $expectedType = null)
);
}

return PdfIndirectObject::parse(
(int) $token,
(int) $token2,
$this,
$this->tokenizer,
$this->streamReader
);
return $this->parsePdfIndirectObject((int)$token, (int)$token2);
case 'R':
if (
$expectedType !== null &&
Expand All @@ -309,7 +304,7 @@ public function readValue($token = null, $expectedType = null)
);
}

return PdfIndirectObjectReference::create((int) $token, (int) $token2);
return PdfIndirectObjectReference::create((int)$token, (int)$token2);
}

$this->tokenizer->pushStack($token3);
Expand Down Expand Up @@ -351,6 +346,65 @@ public function readValue($token = null, $expectedType = null)
}
}

/**
* @return PdfString
*/
protected function parsePdfString()
{
return PdfString::parse($this->streamReader);
}

/**
* @return false|PdfHexString
*/
protected function parsePdfHexString()
{
return PdfHexString::parse($this->streamReader);
}

/**
* @return bool|PdfDictionary
* @throws PdfTypeException
*/
protected function parsePdfDictionary()
{
return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
}

/**
* @return PdfName
*/
protected function parsePdfName()
{
return PdfName::parse($this->tokenizer, $this->streamReader);
}

/**
* @return false|PdfArray
* @throws PdfTypeException
*/
protected function parsePdfArray()
{
return PdfArray::parse($this->tokenizer, $this);
}

/**
* @param int $objectNumber
* @param int $generationNumber
* @return false|PdfIndirectObject
* @throws Type\PdfTypeException
*/
protected function parsePdfIndirectObject($objectNumber, $generationNumber)
{
return PdfIndirectObject::parse(
$objectNumber,
$generationNumber,
$this,
$this->tokenizer,
$this->streamReader
);
}

/**
* Ensures that the token will evaluate to an expected object type (or not).
*
Expand All @@ -359,7 +413,7 @@ public function readValue($token = null, $expectedType = null)
* @return bool
* @throws Type\PdfTypeException
*/
private function ensureExpectedType($token, $expectedType)
protected function ensureExpectedType($token, $expectedType)
{
static $mapping = [
'(' => PdfString::class,
Expand Down
6 changes: 6 additions & 0 deletions src/PdfParser/StreamReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ public function __construct($stream, $closeStream = false)
);
}

if (fseek($stream, 0) === -1) {
throw new \InvalidArgumentException(
'Given stream is not seekable!'
);
}

$this->stream = $stream;
$this->closeStream = $closeStream;
$this->reset();
Expand Down
2 changes: 1 addition & 1 deletion src/PdfParser/Type/PdfArray.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class PdfArray extends PdfType
*
* @param Tokenizer $tokenizer
* @param PdfParser $parser
* @return bool|self
* @return false|self
* @throws PdfTypeException
*/
public static function parse(Tokenizer $tokenizer, PdfParser $parser)
Expand Down
2 changes: 1 addition & 1 deletion src/PdfParser/Type/PdfHexString.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class PdfHexString extends PdfType
* Parses a hexadecimal string object from the stream reader.
*
* @param StreamReader $streamReader
* @return bool|self
* @return false|self
*/
public static function parse(StreamReader $streamReader)
{
Expand Down
14 changes: 7 additions & 7 deletions src/PdfParser/Type/PdfIndirectObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ class PdfIndirectObject extends PdfType
/**
* Parses an indirect object from a tokenizer, parser and stream-reader.
*
* @param int $objectNumberToken
* @param int $objectGenerationNumberToken
* @param int $objectNumber
* @param int $objectGenerationNumber
* @param PdfParser $parser
* @param Tokenizer $tokenizer
* @param StreamReader $reader
* @return bool|self
* @return self|false
* @throws PdfTypeException
*/
public static function parse(
$objectNumberToken,
$objectGenerationNumberToken,
$objectNumber,
$objectGenerationNumber,
MaximilianKresse marked this conversation as resolved.
Show resolved Hide resolved
PdfParser $parser,
Tokenizer $tokenizer,
StreamReader $reader
Expand All @@ -50,8 +50,8 @@ public static function parse(
}

$v = new self();
$v->objectNumber = (int) $objectNumberToken;
$v->generationNumber = (int) $objectGenerationNumberToken;
$v->objectNumber = (int) $objectNumber;
$v->generationNumber = (int) $objectGenerationNumber;
$v->value = $value;

return $v;
Expand Down
45 changes: 38 additions & 7 deletions src/PdfParser/Type/PdfStream.php
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,16 @@ protected function extractStream()
}

/**
* Get the unfiltered stream data.
* Get all filters defined for this stream.
*
* @return string
* @throws FilterException
* @throws PdfParserException
* @return PdfType[]
* @throws PdfTypeException
*/
public function getUnfilteredStream()
public function getFilters()
{
$stream = $this->getStream();
$filters = PdfDictionary::get($this->value, 'Filter');
if ($filters instanceof PdfNull) {
return $stream;
return [];
}

if ($filters instanceof PdfArray) {
Expand All @@ -233,6 +231,24 @@ public function getUnfilteredStream()
$filters = [$filters];
}

return $filters;
}

/**
* Get the unfiltered stream data.
*
* @return string
* @throws FilterException
* @throws PdfParserException
*/
public function getUnfilteredStream()
{
$stream = $this->getStream();
$filters = $this->getFilters();
if ($filters === []) {
return $stream;
}

$decodeParams = PdfDictionary::get($this->value, 'DecodeParms');
if ($decodeParams instanceof PdfArray) {
$decodeParams = $decodeParams->value;
Expand Down Expand Up @@ -308,6 +324,21 @@ public function getUnfilteredStream()
$stream = $filterObject->decode($stream);
break;

case 'Crypt':
if (!$decodeParam instanceof PdfDictionary) {
break;
}
// Filter is "Identity"
$name = PdfDictionary::get($decodeParam, 'Name');
if (!$name instanceof PdfName || $name->value !== 'Identity') {
break;
}

throw new FilterException(
'Support for Crypt filters other than "Identity" is not implemented.',
FilterException::UNSUPPORTED_FILTER
);

default:
throw new FilterException(
\sprintf('Unsupported filter "%s".', $filter->value),
Expand Down
30 changes: 30 additions & 0 deletions src/PdfParser/Type/PdfString.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,36 @@ public static function ensure($string)
return PdfType::ensureType(self::class, $string, 'String value expected.');
}

/**
* Escapes sequences in a string according to the PDF specification.
*
* @param string $s
* @return string
*/
public static function escape($s)
{
// Still a bit faster, than direct replacing
if (
\strpos($s, '\\') !== false ||
\strpos($s, ')') !== false ||
\strpos($s, '(') !== false ||
\strpos($s, "\x0D") !== false ||
\strpos($s, "\x0A") !== false ||
\strpos($s, "\x09") !== false ||
\strpos($s, "\x08") !== false ||
\strpos($s, "\x0C") !== false
) {
// is faster than strtr(...)
return \str_replace(
['\\', ')', '(', "\x0D", "\x0A", "\x09", "\x08", "\x0C"],
['\\\\', '\\)', '\\(', '\r', '\n', '\t', '\b', '\f'],
$s
);
}

return $s;
}

/**
* Unescapes escaped sequences in a PDF string according to the PDF specification.
*
Expand Down
2 changes: 1 addition & 1 deletion src/Tcpdf/Fpdi.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ protected function writePdfType(PdfType $value)
if ($value instanceof PdfString) {
$string = PdfString::unescape($value->value);
$string = $this->_encrypt_data($this->currentObjectNumber, $string);
$value->value = \TCPDF_STATIC::_escape($string);
$value->value = PdfString::escape($string);
} elseif ($value instanceof PdfHexString) {
$filter = new AsciiHex();
$string = $filter->decode($value->value);
Expand Down
21 changes: 21 additions & 0 deletions tests/functional/PdfParser/Type/PdfStreamTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -423,4 +423,25 @@ public function testGetUnfilteredStream($file, $objectNumber, $expectedResult)

$this->assertEquals($expectedResult, $stream->getUnfilteredStream());
}

public function testParseWithCryptFilter()
{
$in = "123 0 obj\n<</Filter /Crypt /Length 5>>\nstream\nHello\nendstream\nendobj";

$stream = StreamReader::createByString($in);

// set position and prepare dictionary (equals to result)
$stream->setOffset(45);
$this->assertSame("\n", $stream->getByte()); // this is the \n after the stream keyword

$dict = PdfDictionary::create([
'Filter' => PdfName::create('Crypt'),
'Length' => PdfNumeric::create(5)
]);

$result = PdfStream::parse($dict, $stream);

$this->assertSame($dict, $result->value);
$this->assertSame('Hello', $result->getUnfilteredStream());
}
}
52 changes: 52 additions & 0 deletions tests/unit/PdfParser/DummyFaultyStreamWrapper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

namespace setasign\Fpdi\unit\PdfParser;

/**
* This stream wrapper is created to simulate a stream wrapper that is not seekable (while its metadata report that
* it is seekable).
*/
class DummyFaultyStreamWrapper
{
public $context;

function stream_open($path, $mode, $options, &$opened_path)
{
return true;
}

function stream_read($count)
{
return '';
}

function stream_write($data)
{
return 0;
}

function stream_tell()
{
return 0;
}

function stream_eof()
{
return false;
}

function stream_seek($offset, $whence)
{
return false;
}

function stream_metadata($path, $option, $var)
{
return true;
}

function stream_stat()
{
return [];
}
}
Loading
Loading