Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
bytestream committed May 21, 2020
1 parent a3edfe5 commit dbce3e8
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 10 deletions.
25 changes: 15 additions & 10 deletions src/HTML5/Parser/DOMTreeBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,12 @@ public function startTag($name, $attributes = array(), $selfClosing = false)
break;
}

// Case when no <body> exists, note section on 'Anything else' below.
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
if ($this->insertMode === static::IM_AFTER_HEAD && 'head' !== $name && 'body' !== $name) {
$this->startTag('body');
}

// Special case handling for SVG.
if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
Expand Down Expand Up @@ -548,21 +554,20 @@ public function comment($cdata)

public function text($data)
{
// XXX: Hmmm.... should we really be this strict?
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
if ($this->insertMode < static::IM_IN_HEAD) {
// Per '8.2.5.4.3 The "before head" insertion mode' the characters
// " \t\n\r\f" should be ignored but no mention of a parse error. This is
// practical as most documents contain these characters. Other text is not
// expected here so recording a parse error is necessary.
// " \t\n\r\f" should be ignored .
$dataTmp = trim($data, " \t\n\r\f");
if (!empty($dataTmp)) {
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
$this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
if (! empty($dataTmp)) {
$this->startTag('head');
$this->endTag('head');
$this->startTag('body');
} else {
return;
}

return;
}
// fprintf(STDOUT, "Appending text %s.", $data);

$node = $this->doc->createTextNode($data);
$this->current->appendChild($node);
}
Expand Down
46 changes: 46 additions & 0 deletions test/HTML5/Html5Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -480,4 +480,50 @@ public function testCDATA()
$res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
$this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
}

/**
* Test for issue #166.
*
* @param $input
* @param $expected
*
* @dataProvider tagOmissionProvider
*/
public function testTagOmission($input, $expected)
{
$doc = $this->html5->loadHTML($input);

$out = $this->html5->saveHTML($doc);

$this->assertRegExp("|" . preg_quote($expected, "|") . "|", $out);
}

/**
* Tag omission test cases.
*
* @return \string[][]
*/
public function tagOmissionProvider()
{
return $provider = array(
array(
'<html>Hello, This is a test.<br />Does it work this time?</html>',
'<html><head></head><body>Hello, This is a test.<br>Does it work this time?</body></html>',
),
// test whitespace (\n)
array(
'<!DOCTYPE html>
<html>
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
<body>
<br>
</body>
</html>',
'<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
<body>
<br>
</body>'
),
);
}
}

0 comments on commit dbce3e8

Please sign in to comment.