Skip to content

Commit

Permalink
Improve codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Sep 25, 2023
1 parent de74ab7 commit 9261f4f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 41 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"ext-simplexml": "*",
"ext-json": "*",
"ext-mbstring": "*",
"league/csv": "^9.9.0"
"league/csv": "^9.6.0"
},
"require-dev": {
"ext-xdebug": "*",
Expand Down
40 changes: 15 additions & 25 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -355,23 +355,28 @@ private function streamToString($stream): string
*/
public function parseHTML(DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): TabularDataReader
{
$xpath = new DOMXPath($this->sourceToDomDocument($source));
/** @var DOMNodeList<DOMElement> $query */
$query = $xpath->query($this->expression);
$query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->expression);
$table = $query->item($this->tableOffset);
if (!$table instanceof DOMElement) {
throw new ParserError('The HTML table could not be found in the submitted html.');
}

return match (true) {
$table instanceof DOMElement => $this->convert(new DOMXPath($this->sourceToDomDocument($table))),
default => throw new ParserError('The HTML table could not be found in the submitted html.'),
$xpath = new DOMXPath($this->sourceToDomDocument($table));
$header = match (true) {
[] !== $this->tableHeader => $this->tableHeader,
$this->ignoreTableHeader => [],
default => $this->extractTableHeader($xpath),
};

return new ResultSet($this->extractTableContents($xpath, $header), $header);
}

/**
* @throws ParserError
*/
private function sourceToDomDocument(
DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document,
): DOMDocument {
private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document): DOMDocument
{
if ($document instanceof DOMDocument) {
return $document;
}
Expand Down Expand Up @@ -400,28 +405,13 @@ private function sourceToDomDocument(
};
}

/**
* @throws ParserError
* @throws SyntaxError
*/
private function convert(DOMXPath $xpath): TabularDataReader
{
$header = match (true) {
[] !== $this->tableHeader => $this->tableHeader,
$this->ignoreTableHeader => [],
default => $this->extractTableHeader($xpath, $this->tableHeaderSection->xpath()),
};

return new ResultSet($this->extractTableContents($xpath, $header), $header);
}

/**
* @return array<string>
*/
private function extractTableHeader(DOMXPath $xpath, string $expression): array
private function extractTableHeader(DOMXPath $xpath): array
{
/** @var DOMNodeList<DOMElement> $query */
$query = $xpath->query($expression);
$query = $xpath->query($this->tableHeaderSection->xpath());
/** @var DOMElement|null $tr */
$tr = $query->item($this->tableHeaderOffset);

Expand Down
45 changes: 30 additions & 15 deletions src/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

use DOMDocument;
use DOMElement;
use League\Csv\TabularDataReader;
use LimitIterator;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\TestCase;
use SimpleXMLElement;
Expand Down Expand Up @@ -102,7 +104,7 @@ public function it_can_load_the_first_html_table_found_by_default(): void

self::assertSame(['prenoms', 'nombre', 'sexe', 'annee'], $table->getHeader());
self::assertCount(4, $table);
self::assertSame($table->first(), [
self::assertSame($this->getNthRecord($table), [
'prenoms' => 'Abdoulaye',
'nombre' => '15',
'sexe' => 'M',
Expand All @@ -117,7 +119,7 @@ public function it_can_load_the_first_html_table_found_by_default_without_the_he

self::assertSame([], $table->getHeader());
self::assertCount(4, $table);
self::assertSame($table->first(), [
self::assertSame($this->getNthRecord($table), [
'Abdoulaye',
'15',
'M',
Expand Down Expand Up @@ -164,7 +166,7 @@ public function it_uses_the_table_first_tr_to_search_for_the_header(): void
'nombre' => '15',
'sexe' => 'M',
'annee' => '2004',
], $table->first());
], $this->getNthRecord($table));

fclose($stream);
}
Expand Down Expand Up @@ -204,7 +206,7 @@ public function it_uses_the_table_first_tr_in_the_first_tbody_to_search_for_the_
'nombre' => '15',
'sexe' => 'M',
'annee' => '2004',
], $table->first());
], $this->getNthRecord($table));
}

#[Test]
Expand Down Expand Up @@ -232,7 +234,7 @@ public function it_will_use_the_submitted_headers(): void
$table = $parser->parseHTML(self::HTML);

self::assertSame(['firstname', 'count', 'gender', 'year'], $table->getHeader());
self::assertSame($table->first(), [
self::assertSame($this->getNthRecord($table), [
'firstname' => 'Abdoulaye',
'count' => '15',
'gender' => 'M',
Expand All @@ -256,8 +258,8 @@ public function it_will_duplicate_colspan_data(): void

$table = Parser::new()->parseHTML($html);

self::assertSame($table->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
self::assertSame($table->nth(0), ['prenoms', 'nombre', 'sexe', 'annee']);
self::assertSame($this->getNthRecord($table, 1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
self::assertSame($this->getNthRecord($table), ['prenoms', 'nombre', 'sexe', 'annee']);
}

#[Test]
Expand All @@ -280,8 +282,8 @@ public function it_will_ignore_the_malformed_header_by_deault(): void
$table = Parser::new()->parseHTML($dom);

self::assertSame([], $table->getHeader());
self::assertSame($table->nth(0), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
self::assertSame($table->nth(1), ['Abel', '14', 'M', '2004']);
self::assertSame($this->getNthRecord($table), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
self::assertSame($this->getNthRecord($table, 1), ['Abel', '14', 'M', '2004']);
}

#[Test]
Expand Down Expand Up @@ -366,7 +368,7 @@ public function it_will_use_the_table_footer(): void
->parseHTML($html);

self::assertSame([], $table->getHeader());
self::assertSame([], $table->first());
self::assertSame([], $this->getNthRecord($table));
}

#[Test]
Expand All @@ -393,13 +395,13 @@ public function it_uses_the_parser_formatter(): void
'nombre' => 15,
'sexe' => 'M',
'annee' => 2004,
], $table->first());
], $this->getNthRecord($table));

fclose($stream);
}

#[Test]
public function it_can_handle_rowspan(): void
public function it_can_handle_rowspan_and_colspan(): void
{
$table = <<<TABLE
<table>
Expand Down Expand Up @@ -445,10 +447,23 @@ public function it_can_handle_rowspan(): void
</table>
TABLE;

$reducer = fn (TabularDataReader $reader, string $value): int => array_reduce([...$reader], fn (int $carry, array $record): int => $carry + (array_count_values($record)[$value] ?? 0), 0);
$table = Parser::new()->parseHTML($table);

self::assertSame(2, $table->reduce(fn (int $carry, array $record): int => $carry + (array_count_values($record)['colspan'] ?? 0), 0));
self::assertSame(2, $table->reduce(fn (int $carry, array $record): int => $carry + (array_count_values($record)['rowspan'] ?? 0), 0));
self::assertSame(6, $table->reduce(fn (int $carry, array $record): int => $carry + (array_count_values($record)['colspan+rowspan'] ?? 0), 0));
self::assertSame(2, $reducer($table, 'colspan'));
self::assertSame(2, $reducer($table, 'rowspan'));
self::assertSame(6, $reducer($table, 'colspan+rowspan'));
}

/** @return array<string> */
private function getNthRecord(TabularDataReader $reader, int $offset = 0): array
{
$iterator = new LimitIterator($reader->getRecords(), $offset, 1);
$iterator->rewind();

/** @var array<string>|null $result */
$result = $iterator->current();

return $result ?? [];
}
}

0 comments on commit 9261f4f

Please sign in to comment.