From e9ddd399d8c434c21246531ca72c818218a5ede9 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Fri, 17 Jan 2025 17:50:19 +0100 Subject: [PATCH] Introduce the TabularData interface --- CHANGELOG.md | 5 + composer.json | 18 ++- docs/9.0/reader/index.md | 1 + docs/9.0/reader/resultset.md | 34 +++++ src/RdbmsResult.php | 243 +++++++++++++++++++++++++++++++++++ src/RdbmsResultTest.php | 122 ++++++++++++++++++ src/Reader.php | 30 ++--- src/ResultSet.php | 36 ++++-- src/TabularData.php | 50 +++++++ src/TabularDataReader.php | 32 +---- test_files/users.sqlite | 0 11 files changed, 509 insertions(+), 62 deletions(-) create mode 100644 src/RdbmsResult.php create mode 100644 src/RdbmsResultTest.php create mode 100644 src/TabularData.php create mode 100644 test_files/users.sqlite diff --git a/CHANGELOG.md b/CHANGELOG.md index 78683cd3..72742828 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,16 +9,21 @@ All Notable changes to `Csv` will be documented in this file - `Writer::necessaryEnclosure` - `TabularDataReader::selectAllExcept` - `Statement::selectAllExcept` +- `ResultSet::createFromTabularData` +- `RdbmsResult` +- `TabularData` ### Deprecated - `Writer::relaxEnclosure` use `Writer::necessaryEnclosure` +- `ResultSet::createFromTabularDataReader` use `ResultSet::createFromTabularData` ### Fixed - `Comparison::CONTAINS` must check the value is a string before calling `str_compare` [#548](https://github.com/thephpleague/csv/pull/548) by [cage-is](https://github.com/cage-is) - Fix testing to improve Debian integration [#549](https://github.com/thephpleague/csv/pull/549) by [David Prévot and tenzap](https://github.com/tenzap) - `Bom::tryFromSequence` and `Bom::fromSequence` supports the `Reader` and `Writer` classes. +- `ResultSet::createFromRecords` now automatically set the header for any `TabularDataReader` or `PDOStatement` instance. ### Removed diff --git a/composer.json b/composer.json index 75a29979..510222b4 100644 --- a/composer.json +++ b/composer.json @@ -32,14 +32,14 @@ "require-dev": { "ext-dom": "*", "ext-xdebug": "*", - "friendsofphp/php-cs-fixer": "^3.64.0", + "friendsofphp/php-cs-fixer": "^3.68.1", "phpbench/phpbench": "^1.3.1", - "phpstan/phpstan": "^1.12.11", + "phpstan/phpstan": "^1.12.15", "phpstan/phpstan-deprecation-rules": "^1.2.1", - "phpstan/phpstan-phpunit": "^1.4.1", + "phpstan/phpstan-phpunit": "^1.4.2", "phpstan/phpstan-strict-rules": "^1.6.1", - "phpunit/phpunit": "^10.5.16 || ^11.4.3", - "symfony/var-dumper": "^6.4.8 || ^7.1.8" + "phpunit/phpunit": "^10.5.16 || ^11.5.3", + "symfony/var-dumper": "^6.4.8 || ^7.2.0" }, "autoload": { "psr-4": { @@ -68,9 +68,13 @@ "test": "Runs full test suite" }, "suggest": { - "ext-iconv" : "Needed to ease transcoding CSV using iconv stream filters", "ext-dom" : "Required to use the XMLConverter and the HTMLConverter classes", - "ext-mbstring": "Needed to ease transcoding CSV using mb stream filters" + "ext-iconv" : "Needed to ease transcoding CSV using iconv stream filters", + "ext-mbstring": "Needed to ease transcoding CSV using mb stream filters", + "ext-pdo": "Required to use the package with the PDO extension", + "ext-sqlite3": "Required to use the package with the SQLite3 extension", + "ext-mysqli": "Requiered to use the package with the MySQLi extension", + "ext-pgsql": "Requiered to use the package with the PgSQL extension" }, "extra": { "branch-alias": { diff --git a/docs/9.0/reader/index.md b/docs/9.0/reader/index.md index cc7de0e9..14ee3698 100644 --- a/docs/9.0/reader/index.md +++ b/docs/9.0/reader/index.md @@ -11,6 +11,7 @@ the `League\Csv\TabularDataReader` interface.

Starting with version 9.1.0, createFromPath has its default open_mode parameter set to r.

Prior to 9.1.0, the open mode was r+ which looks for write permissions on the file and throws an Exception if the file cannot be opened with the permission set. For sake of clarity, it is strongly suggested to set r mode on the file to ensure it can be opened.

+

Starting with version 9.22.0, the class implements the League\Csv\TabularData interface.

The `Reader` provides a convenient and straight forward API to access and handle CSV. While most of its capabilities are explained in the [Tabular Data Reader documentation page](/9.0/reader/tabular-data-reader), diff --git a/docs/9.0/reader/resultset.md b/docs/9.0/reader/resultset.md index bcf0974b..9438a753 100644 --- a/docs/9.0/reader/resultset.md +++ b/docs/9.0/reader/resultset.md @@ -9,6 +9,40 @@ A `League\Csv\ResultSet` object represents the associated result set of processi This object is returned from [Statement::process](/9.0/reader/statement/#apply-the-constraints-to-a-csv-document) execution.

Starting with version 9.6.0, the class implements the League\Csv\TabularDataReader interface.

+

Starting with version 9.22.0, the class implements the League\Csv\TabularData interface.

+ +## Instantiation + +

Starting with version 9.22.0

+ +The `ResultSet` object can be instantiated from other objects than `Statement`. + +You can instantiate it directly any object that implements the `League\Csv\TabularData` like the `Reader` class: + +```php +$resultSet = ResultSet::createFromTabularData(Reader::createFromPath('path/to/file.csv')); +``` + +But you can also instantiate it from RDBMS results using the `ResultSet::createFromRdbms` method: + +```php +$db = new SQLite3( '/path/to/my/db.sqlite'); +$stmt = $db->query("SELECT * FROM users"); +$stmt instanceof SQLite3Result || throw new RuntimeException('SQLite3 results not available'); + +$user24 = ResultSet::createFromRdbms($stmt)->nth(23); +``` + +the `createFromRdbms` can be used with the following Database Extensions: + +- SQLite3 (`SQLite3Result` object) +- MySQL Improved Extension (`mysqli_result` object) +- PostgreSQL (`PgSql\Result` object returned by the `pg_get_result`) +- PDO (`PDOStatement` object) + +

Beware when using the PDOStatement, the class does not support rewinding the object. +As such using the instance on huge results will trigger high memory usage as all the data will be stored in a +ArrayIterator instance for cache to allow rewinding and inspecting the tabular data.

## Selecting records diff --git a/src/RdbmsResult.php b/src/RdbmsResult.php new file mode 100644 index 00000000..f39644ec --- /dev/null +++ b/src/RdbmsResult.php @@ -0,0 +1,243 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use ArrayIterator; +use Iterator; +use mysqli_result; +use PDO; +use PDOStatement; +use PgSql\Result; +use RuntimeException; +use SQLite3Result; +use Throwable; +use ValueError; + +use function array_column; +use function array_map; +use function pg_fetch_assoc; +use function pg_field_name; +use function pg_num_fields; +use function pg_result_seek; +use function range; + +use const SQLITE3_ASSOC; + +final class RdbmsResult implements TabularData +{ + private function __construct(private readonly Iterator $records, private readonly array $headers) + { + } + + public function getHeader(): array + { + return $this->headers; + } + + public function getIterator(): Iterator + { + return $this->records; + } + + public static function tryFrom(object $result): ?self + { + try { + return self::from($result); + } catch (Throwable) { + return null; + } + } + + /** + * @throws RuntimeException If the DB result is unknown or unsupported + */ + public static function from(object $result): self + { + return new self(self::records($result), self::columnNames($result)); + } + + /** + * @throws RuntimeException If the DB result is unknown or unsupported or no column names information is found. + * + * @return array + */ + public static function columnNames(object $result): array + { + return match (true) { + $result instanceof PDOStatement => array_map( + function (int $i) use ($result): string { + $metadata = $result->getColumnMeta($i); + false !== $metadata || throw new RuntimeException('Unable to get metadata for column '.$i); + + return $metadata['name']; + }, + range(0, $result->columnCount() - 1) + ), + $result instanceof Result => array_map(fn (int $index) => pg_field_name($result, $index), range(0, pg_num_fields($result) - 1)), + $result instanceof mysqli_result => array_column($result->fetch_fields(), 'name'), + $result instanceof SQLite3Result => array_map($result->columnName(...), range(0, $result->numColumns() - 1)), + default => throw new ValueError('Unknown or unsupported RDBMS result object '.$result::class), + }; + } + + public static function records(object $result): Iterator + { + return match (true) { + $result instanceof SQLite3Result => new class ($result) implements Iterator { + private array|false $current; + private int $key = 0; + + public function __construct(private SQLite3Result $result) + { + } + + public function rewind(): void + { + $this->result->reset(); + $this->current = $this->result->fetchArray(SQLITE3_ASSOC); + $this->key = 0; + } + + public function current(): array|false + { + return $this->current; + } + + public function key(): string|int|null + { + return $this->key; + } + + public function next(): void + { + $this->current = $this->result->fetchArray(SQLITE3_ASSOC); + $this->key++; + } + + public function valid(): bool + { + return false !== $this->current; + } + }, + $result instanceof mysqli_result => new class ($result) implements Iterator { + private array|false|null $current; + private int $key = 0; + + public function __construct(private mysqli_result $result) + { + } + + public function rewind(): void + { + $this->result->data_seek(0); + $this->current = $this->result->fetch_assoc(); + $this->key = 0; + } + + public function current(): array|false|null + { + return $this->current; + } + + public function key(): string|int|null + { + return $this->key; + } + + public function next(): void + { + $this->current = $this->result->fetch_assoc(); + $this->key++; + } + + public function valid(): bool + { + return false !== $this->current + && null !== $this->current; + } + }, + $result instanceof Result => new class ($result) implements Iterator { + private array|false|null $current; + private int $key = 0; + + public function __construct(private Result $result) + { + } + + public function rewind(): void + { + pg_result_seek($this->result, 0); + $this->current = pg_fetch_assoc($this->result); + $this->key = 0; + } + + public function current(): array|false|null + { + return $this->current; + } + + public function key(): string|int|null + { + return $this->key; + } + + public function next(): void + { + $this->current = pg_fetch_assoc($this->result); + $this->key++; + } + + public function valid(): bool + { + return false !== $this->current + && null !== $this->current; + } + }, + $result instanceof PDOStatement => new class ($result) implements Iterator { + private ?ArrayIterator $cacheIterator; + + public function __construct(private PDOStatement $result) + { + } + + public function rewind(): void + { + $this->cacheIterator ??= new ArrayIterator($this->result->fetchAll(PDO::FETCH_ASSOC)); + $this->cacheIterator->rewind(); + } + + public function current(): mixed + { + return $this->cacheIterator?->current() ?? false; + } + + public function key(): string|int|null + { + return $this->cacheIterator?->key() ?? null; + } + + public function next(): void + { + $this->cacheIterator?->next(); + } + + public function valid(): bool + { + return $this->cacheIterator?->valid() ?? false; + } + }, + default => throw new ValueError('Unknown or unsupported RDBMS result object '.$result::class), + }; + } +} diff --git a/src/RdbmsResultTest.php b/src/RdbmsResultTest.php new file mode 100644 index 00000000..ff4493fc --- /dev/null +++ b/src/RdbmsResultTest.php @@ -0,0 +1,122 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use PDO; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use SQLite3; +use SQLite3Exception; +use SQLite3Result; +use SQLite3Stmt; +use stdClass; + +use function iterator_to_array; + +final class RdbmsResultTest extends TestCase +{ + #[Test] + public function it_can_be_used_with_sqlite3(): void + { + $db = new SQLite3(''); + $tableCreateQuery = <<exec($tableCreateQuery); + $seedData = [ + ['name' => 'Ronnie', 'email' => 'ronnie@example.com'], + ['name' => 'Bobby', 'email' => 'bobby@example.com'], + ['name' => 'Ricky', 'email' => 'ricky@example.com'], + ['name' => 'Mike', 'email' => 'mike@example.com'], + ['name' => 'Ralph', 'email' => 'ralph@example.com'], + ['name' => 'Johnny', 'email' => 'johnny@example.com'], + ]; + + $stmt = $db->prepare('INSERT INTO users (name, email) VALUES (:name, :email)'); + if (!$stmt instanceof SQLite3Stmt) { + throw new SQLite3Exception('Unable to prepare statement'); + } + + foreach ($seedData as $data) { + $stmt->bindValue(':name', $data['name']); + $stmt->bindValue(':email', $data['email']); + $stmt->execute(); + } + + /** @var SQLite3Stmt $stmt */ + $stmt = $db->prepare('SELECT * FROM users'); + /** @var SQLite3Result $result */ + $result = $stmt->execute(); + /** @var RdbmsResult $tabularData */ + $tabularData = RdbmsResult::tryFrom($result); + + self::assertSame(['id', 'name', 'email'], $tabularData->getHeader()); + self::assertCount(6, iterator_to_array($tabularData)); + self::assertSame( + ['id' => 1, 'name' => 'Ronnie', 'email' => 'ronnie@example.com'], + ResultSet::createFromTabularData($tabularData)->first() + ); + } + + #[Test] + public function it_can_be_used_with_pdo(): void + { + $connection = new PDO('sqlite::memory:'); + $tableCreateQuery = <<exec($tableCreateQuery); + $seedData = [ + ['name' => 'Ronnie', 'email' => 'ronnie@example.com'], + ['name' => 'Bobby', 'email' => 'bobby@example.com'], + ['name' => 'Ricky', 'email' => 'ricky@example.com'], + ['name' => 'Mike', 'email' => 'mike@example.com'], + ['name' => 'Ralph', 'email' => 'ralph@example.com'], + ['name' => 'Johnny', 'email' => 'johnny@example.com'], + ]; + + $stmt = $connection->prepare('INSERT INTO users (name, email) VALUES (:name, :email)'); + foreach ($seedData as $data) { + $stmt->bindValue(':name', $data['name']); + $stmt->bindValue(':email', $data['email']); + $stmt->execute(); + } + + $stmt = $connection->prepare('SELECT * FROM users'); + $stmt->execute(); + /** @var RdbmsResult $tabularData */ + $tabularData = RdbmsResult::tryFrom($stmt); + + self::assertSame(['id', 'name', 'email'], $tabularData->getHeader()); + self::assertCount(6, iterator_to_array($tabularData)); + self::assertSame( + ['id' => 1, 'name' => 'Ronnie', 'email' => 'ronnie@example.com'], + ResultSet::createFromTabularData($tabularData)->first() + ); + } + + #[Test] + public function it_will_fail_with_an_unknown_object(): void + { + self::assertNull(RdbmsResult::tryFrom(new stdClass())); + } +} diff --git a/src/Reader.php b/src/Reader.php index ce82536c..c303300e 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -236,7 +236,7 @@ protected function removeBOM(array $record, int $bom_length, string $enclosure): public function fetchColumn(string|int $index = 0): Iterator { - return ResultSet::createFromTabularDataReader($this)->fetchColumn($index); + return ResultSet::createFromTabularData($this)->fetchColumn($index); } /** @@ -244,7 +244,7 @@ public function fetchColumn(string|int $index = 0): Iterator */ public function fetchColumnByName(string $name): Iterator { - return ResultSet::createFromTabularDataReader($this)->fetchColumnByName($name); + return ResultSet::createFromTabularData($this)->fetchColumnByName($name); } /** @@ -252,12 +252,12 @@ public function fetchColumnByName(string $name): Iterator */ public function fetchColumnByOffset(int $offset = 0): Iterator { - return ResultSet::createFromTabularDataReader($this)->fetchColumnByOffset($offset); + return ResultSet::createFromTabularData($this)->fetchColumnByOffset($offset); } public function value(int|string $column = 0): mixed { - return ResultSet::createFromTabularDataReader($this)->value($column); + return ResultSet::createFromTabularData($this)->value($column); } /** @@ -265,7 +265,7 @@ public function value(int|string $column = 0): mixed */ public function first(): array { - return ResultSet::createFromTabularDataReader($this)->first(); + return ResultSet::createFromTabularData($this)->first(); } /** @@ -273,7 +273,7 @@ public function first(): array */ public function nth(int $nth_record): array { - return ResultSet::createFromTabularDataReader($this)->nth($nth_record); + return ResultSet::createFromTabularData($this)->nth($nth_record); } /** @@ -283,7 +283,7 @@ public function nth(int $nth_record): array */ public function nthAsObject(int $nth, string $className, array $header = []): ?object { - return ResultSet::createFromTabularDataReader($this)->nthAsObject($nth, $className, $header); + return ResultSet::createFromTabularData($this)->nthAsObject($nth, $className, $header); } /** @@ -293,12 +293,12 @@ public function nthAsObject(int $nth, string $className, array $header = []): ?o */ public function firstAsObject(string $className, array $header = []): ?object { - return ResultSet::createFromTabularDataReader($this)->firstAsObject($className, $header); + return ResultSet::createFromTabularData($this)->firstAsObject($className, $header); } public function fetchPairs($offset_index = 0, $value_index = 1): Iterator { - return ResultSet::createFromTabularDataReader($this)->fetchPairs($offset_index, $value_index); + return ResultSet::createFromTabularData($this)->fetchPairs($offset_index, $value_index); } /** @@ -334,7 +334,7 @@ public function jsonSerialize(): array */ public function each(callable $callback): bool { - return ResultSet::createFromTabularDataReader($this)->each($callback); + return ResultSet::createFromTabularData($this)->each($callback); } /** @@ -342,7 +342,7 @@ public function each(callable $callback): bool */ public function exists(callable $callback): bool { - return ResultSet::createFromTabularDataReader($this)->exists($callback); + return ResultSet::createFromTabularData($this)->exists($callback); } /** @@ -355,7 +355,7 @@ public function exists(callable $callback): bool */ public function reduce(callable $callback, mixed $initial = null): mixed { - return ResultSet::createFromTabularDataReader($this)->reduce($callback, $initial); + return ResultSet::createFromTabularData($this)->reduce($callback, $initial); } /** @@ -381,7 +381,7 @@ public function map(callable $callback): Iterator */ public function chunkBy(int $recordsCount): iterable { - return ResultSet::createFromTabularDataReader($this)->chunkBy($recordsCount); + return ResultSet::createFromTabularData($this)->chunkBy($recordsCount); } /** @@ -472,12 +472,12 @@ public function matchingFirstOrFail(string $expression): TabularDataReader public function select(string|int ...$columns): TabularDataReader { - return ResultSet::createFromTabularDataReader($this)->select(...$columns); + return ResultSet::createFromTabularData($this)->select(...$columns); } public function selectAllExcept(string|int ...$columns): TabularDataReader { - return ResultSet::createFromTabularDataReader($this)->selectAllExcept(...$columns); + return ResultSet::createFromTabularData($this)->selectAllExcept(...$columns); } /** diff --git a/src/ResultSet.php b/src/ResultSet.php index cc031c65..82e69e4b 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -24,6 +24,7 @@ use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; use LimitIterator; +use RuntimeException; use function array_filter; use function array_flip; @@ -77,31 +78,37 @@ protected function validateHeader(array $header): array }; } - public function __destruct() + /** + * Returns a new instance from a object representing the result of querying a database. + * + * @throws RuntimeException If the result object is not supported + */ + public static function createFromRdbms(object $result): self { - unset($this->records); + return self::createFromTabularData(RdbmsResult::from($result)); } /** - * Returns a new instance from an object implementing the TabularDataReader interface. - * - * @throws SyntaxError + * Returns a new instance from a tabular data implementing object. */ - public static function createFromTabularDataReader(TabularDataReader $reader): self + public static function createFromTabularData(TabularData $records): self { - return new self($reader->getRecords(), $reader->getHeader()); + return new self($records->getIterator(), $records->getHeader()); } /** * Returns a new instance from a collection without header. - * - * @throws SyntaxError */ public static function createFromRecords(iterable $records = []): self { return new self(MapIterator::toIterator($records)); } + public function __destruct() + { + unset($this->records); + } + /** * Returns the header associated with the result set. * @@ -655,4 +662,15 @@ public function getObjects(string $className, array $header = []): Iterator { return $this->getRecordsAsObject($className, $header); } + + /** + * Returns a new instance from an object implementing the TabularDataReader interface. + * + * @throws SyntaxError + */ + #[Deprecated(message:'use League\Csv\ResultSet::createFromTabularData() instead', since:'league/csv:9.22.0')] + public static function createFromTabularDataReader(TabularDataReader $reader): self + { + return self::createFromRecords($reader); + } } diff --git a/src/TabularData.php b/src/TabularData.php new file mode 100644 index 00000000..f1cbb589 --- /dev/null +++ b/src/TabularData.php @@ -0,0 +1,50 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use Iterator; +use IteratorAggregate; + +/** + * @template TValue of array + * @template-extends IteratorAggregate + */ +interface TabularData extends IteratorAggregate +{ + /** + * Returns the header associated with the tabular data. + * + * The header must contain unique string or to be an empty array + * if no header was specified. + * + * @return array + */ + public function getHeader(): array; + + /** + * Returns the tabular data records as an iterator object containing flat array. + * + * Each record is represented as a simple array containing strings or null values. + * + * If the CSV document has a header record then each record is combined + * to the header record and the header record is removed from the iterator. + * + * If the CSV document is inconsistent. Missing record fields are + * filled with null values while extra record fields are strip from + * the returned object. + * + * @return Iterator> + */ + public function getIterator(): Iterator; +} diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php index 056be888..b296e305 100644 --- a/src/TabularDataReader.php +++ b/src/TabularDataReader.php @@ -17,7 +17,6 @@ use Countable; use Deprecated; use Iterator; -use IteratorAggregate; /** * Represents a Tabular data. @@ -45,11 +44,8 @@ * @method iterable matching(string $expression) extract all found fragment identifiers for the tabular data * @method iterable chunkBy(int $recordsCount) Chunk the TabulaDataReader into smaller TabularDataReader instances of the given size or less. * @method TabularDataReader mapHeader(array $headers) Returns a new TabulaDataReader with a new set of headers. - * - * @template TValue of array - * @template-extends IteratorAggregate */ -interface TabularDataReader extends Countable, IteratorAggregate +interface TabularDataReader extends Countable, TabularData { /** * Returns the number of records contained in the tabular data structure @@ -57,32 +53,6 @@ interface TabularDataReader extends Countable, IteratorAggregate */ public function count(): int; - /** - * Returns the tabular data records as an iterator object containing flat array. - * - * Each record is represented as a simple array containing strings or null values. - * - * If the CSV document has a header record then each record is combined - * to the header record and the header record is removed from the iterator. - * - * If the CSV document is inconsistent. Missing record fields are - * filled with null values while extra record fields are strip from - * the returned object. - * - * @return Iterator> - */ - public function getIterator(): Iterator; - - /** - * Returns the header associated with the tabular data. - * - * The header must contain unique string or to be an empty array - * if no header was specified. - * - * @return array - */ - public function getHeader(): array; - /** * Returns the tabular data records as an iterator object. * diff --git a/test_files/users.sqlite b/test_files/users.sqlite new file mode 100644 index 00000000..e69de29b