From 3b9aefb06cae1f34ed19b08caf3b3ed32df04b73 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 16 Apr 2024 12:06:46 +0000 Subject: [PATCH] HTML API: Validate HTML Processor against external test suite from html5lib. In this patch, the test suite from html5lib validates the tree-construction steps in the HTML Processor to ensure that they are behaving according to the HTML specification. This suite of tests is also used by the servo project to test its html5ever package. A new test module in the HTML API transforms HTML Processor output to match the expected tree shape from the external tests. For cases where there are tests validating behaviors of unsupported HTML tags and constructs, the tests are marked as skipped. As the HTML API continues to expand its own support, the number of skipped tests will automatically shrink down towards zero. Additional tests are skipped through the `SKIP_TEST` array in the test runner. Fixes #60227. See #58517. Props azaozz, costdev, dmsnell, hellofromtonya, jonsurrell, jorbin, swisspidy. git-svn-id: https://develop.svn.wordpress.org/trunk@58010 602fd350-edb4-49c9-b593-d223f7449a82 --- phpunit.xml.dist | 1 + .../data/html5lib-tests/.gitattributes | 1 + tests/phpunit/data/html5lib-tests/AUTHORS.rst | 34 + tests/phpunit/data/html5lib-tests/LICENSE | 21 + tests/phpunit/data/html5lib-tests/README.md | 25 + .../tree-construction/README.md | 108 + .../tree-construction/adoption01.dat | 354 +++ .../tree-construction/adoption02.dat | 39 + .../tree-construction/blocks.dat | 695 +++++ .../tree-construction/comments01.dat | 217 ++ .../tree-construction/doctype01.dat | 474 +++ .../tree-construction/domjs-unsafe.dat | Bin 0 -> 10356 bytes .../tree-construction/entities01.dat | 943 ++++++ .../tree-construction/entities02.dat | 309 ++ .../tree-construction/foreign-fragment.dat | 645 ++++ .../tree-construction/html5test-com.dat | 301 ++ .../tree-construction/inbody01.dat | 54 + .../tree-construction/isindex.dat | 49 + .../tree-construction/main-element.dat | 46 + .../html5lib-tests/tree-construction/math.dat | 104 + .../tree-construction/menuitem-element.dat | 240 ++ .../namespace-sensitivity.dat | 22 + .../tree-construction/noscript01.dat | 237 ++ ...pending-spec-changes-plain-text-unsafe.dat | Bin 0 -> 927 bytes .../pending-spec-changes.dat | 46 + .../tree-construction/plain-text-unsafe.dat | Bin 0 -> 9486 bytes .../tree-construction/quirks01.dat | 53 + .../html5lib-tests/tree-construction/ruby.dat | 302 ++ .../tree-construction/scriptdata01.dat | 372 +++ .../tree-construction/search-element.dat | 46 + .../html5lib-tests/tree-construction/svg.dat | 104 + .../tree-construction/tables01.dat | 322 ++ .../tree-construction/template.dat | 1673 +++++++++++ .../tree-construction/tests1.dat | 1956 +++++++++++++ .../tree-construction/tests10.dat | 849 ++++++ .../tree-construction/tests11.dat | 523 ++++ .../tree-construction/tests12.dat | 62 + .../tree-construction/tests14.dat | 75 + .../tree-construction/tests15.dat | 216 ++ .../tree-construction/tests16.dat | 2602 +++++++++++++++++ .../tree-construction/tests17.dat | 179 ++ .../tree-construction/tests18.dat | 558 ++++ .../tree-construction/tests19.dat | 1398 +++++++++ .../tree-construction/tests2.dat | 831 ++++++ .../tree-construction/tests20.dat | 842 ++++++ .../tree-construction/tests21.dat | 306 ++ .../tree-construction/tests22.dat | 190 ++ .../tree-construction/tests23.dat | 168 ++ .../tree-construction/tests24.dat | 79 + .../tree-construction/tests25.dat | 288 ++ .../tree-construction/tests26.dat | 453 +++ .../tree-construction/tests3.dat | 305 ++ .../tree-construction/tests4.dat | 74 + .../tree-construction/tests5.dat | 210 ++ .../tree-construction/tests6.dat | 663 +++++ .../tree-construction/tests7.dat | 453 +++ .../tree-construction/tests8.dat | 165 ++ .../tree-construction/tests9.dat | 472 +++ .../tree-construction/tests_innerHTML_1.dat | 843 ++++++ .../tree-construction/tricky01.dat | 336 +++ .../tree-construction/webkit01.dat | 785 +++++ .../tree-construction/webkit02.dat | 554 ++++ tests/phpunit/multisite.xml | 1 + .../html-api/wpHtmlProcessorHtml5lib.php | 372 +++ 64 files changed, 24645 insertions(+) create mode 100644 tests/phpunit/data/html5lib-tests/.gitattributes create mode 100644 tests/phpunit/data/html5lib-tests/AUTHORS.rst create mode 100644 tests/phpunit/data/html5lib-tests/LICENSE create mode 100644 tests/phpunit/data/html5lib-tests/README.md create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/README.md create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/adoption02.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/blocks.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/comments01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/doctype01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/domjs-unsafe.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/entities01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/entities02.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/foreign-fragment.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/html5test-com.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/inbody01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/isindex.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/main-element.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/math.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/namespace-sensitivity.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/noscript01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/pending-spec-changes.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/plain-text-unsafe.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/quirks01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/ruby.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/scriptdata01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/search-element.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/svg.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tables01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/template.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests1.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests10.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests11.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests12.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests14.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests15.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests16.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests17.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests18.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests19.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests2.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests20.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests21.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests22.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests23.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests24.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests25.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests26.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests3.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests4.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests5.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests6.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests7.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests8.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tests_innerHTML_1.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/tricky01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/webkit01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/webkit02.dat create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php diff --git a/phpunit.xml.dist b/phpunit.xml.dist index f432b02e32755..ddbe4bd080ce1 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -28,6 +28,7 @@ ms-files ms-required external-http + html-api-html5lib-tests diff --git a/tests/phpunit/data/html5lib-tests/.gitattributes b/tests/phpunit/data/html5lib-tests/.gitattributes new file mode 100644 index 0000000000000..b23807b2c3335 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/.gitattributes @@ -0,0 +1 @@ +*.dat -text diff diff --git a/tests/phpunit/data/html5lib-tests/AUTHORS.rst b/tests/phpunit/data/html5lib-tests/AUTHORS.rst new file mode 100644 index 0000000000000..4a7de17ad456c --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/AUTHORS.rst @@ -0,0 +1,34 @@ +Credits +======= + +The ``html5lib`` test data is maintained by: + +- James Graham +- Geoffrey Sneddon + + +Contributors +------------ + +- Adam Barth +- Andi Sidwell +- Anne van Kesteren +- David Flanagan +- Edward Z. Yang +- Geoffrey Sneddon +- Henri Sivonen +- Ian Hickson +- Jacques Distler +- James Graham +- Lachlan Hunt +- lantis63 +- Mark Pilgrim +- Mats Palmgren +- Ms2ger +- Nolan Waite +- Philip Taylor +- Rafael Weinstein +- Ryan King +- Sam Ruby +- Simon Pieters +- Thomas Broyer diff --git a/tests/phpunit/data/html5lib-tests/LICENSE b/tests/phpunit/data/html5lib-tests/LICENSE new file mode 100644 index 0000000000000..8812371b41cfc --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and +other contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/tests/phpunit/data/html5lib-tests/README.md b/tests/phpunit/data/html5lib-tests/README.md new file mode 100644 index 0000000000000..be775c8b497b5 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/README.md @@ -0,0 +1,25 @@ +# html5lib-tests + +This directory contains a third-party test suite used for testing the WordPress HTML API. + +`html5lib-tests` can be found on GitHub at [html5lib/html5lib-tests](https://github.com/html5lib/html5lib-tests). + +The necessary files have been copied to this directory: + +- `AUTHORS.rst` +- `LICENSE` +- `README.md` +- `tree-construction/README.md` +- `tree-construction/*.dat` + +The version of these files was taken from the git commit with +SHA [`a9f44960a9fedf265093d22b2aa3c7ca123727b9`](https://github.com/html5lib/html5lib-tests/commit/a9f44960a9fedf265093d22b2aa3c7ca123727b9). + +## Updating + +If there have been changes to the html5lib-tests repository, this test suite can be updated. In +order to update: + +1. Check out the latest version of git repository mentioned above. +1. Copy the files listed above into this directory. +1. Update the SHA mentioned in this README file with the new html5lib-tests SHA. diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/README.md b/tests/phpunit/data/html5lib-tests/tree-construction/README.md new file mode 100644 index 0000000000000..4737a3a867e86 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/tree-construction/README.md @@ -0,0 +1,108 @@ +Tree Construction Tests +======================= + +Each file containing tree construction tests consists of any number of +tests separated by two newlines (LF) and a single newline before the end +of the file. For instance: + + [TEST]LF + LF + [TEST]LF + LF + [TEST]LF + +Where [TEST] is the following format: + +Each test must begin with a string "\#data" followed by a newline (LF). +All subsequent lines until a line that says "\#errors" are the test data +and must be passed to the system being tested unchanged, except with the +final newline (on the last line) removed. + +Then there must be a line that says "\#errors". It must be followed by +one line per parse error that a conformant checker would return. It +doesn't matter what those lines are, although they can't be +"\#new-errors", "\#document-fragment", "\#document", "\#script-off", +"\#script-on", or empty, the only thing that matters is that there be +the right number of parse errors. + +Then there \*may\* be a line that says "\#new-errors", which works like +the "\#errors" section adding more errors to the expected number of +errors. + +Then there \*may\* be a line that says "\#document-fragment", which must +be followed by a newline (LF), followed by a string of characters that +indicates the context element, followed by a newline (LF). If the string +of characters starts with "svg ", the context element is in the SVG +namespace and the substring after "svg " is the local name. If the +string of characters starts with "math ", the context element is in the +MathML namespace and the substring after "math " is the local name. +Otherwise, the context element is in the HTML namespace and the string +is the local name. If this line is present the "\#data" must be parsed +using the HTML fragment parsing algorithm with the context element as +context. + +Then there \*may\* be a line that says "\#script-off" or +"\#script-on". If a line that says "\#script-off" is present, the +parser must set the scripting flag to disabled. If a line that says +"\#script-on" is present, it must set it to enabled. Otherwise, the +test should be run in both modes. + +Then there must be a line that says "\#document", which must be followed +by a dump of the tree of the parsed DOM. Each node must be represented +by a single line. Each line must start with "| ", followed by two spaces +per parent node that the node has before the root document node. + +- Element nodes must be represented by a "`<`" then the *tag name + string* "`>`", and all the attributes must be given, sorted + lexicographically by UTF-16 code unit according to their *attribute + name string*, on subsequent lines, as if they were children of the + element node. +- Attribute nodes must have the *attribute name string*, then an "=" + sign, then the attribute value in double quotes ("). +- Text nodes must be the string, in double quotes. Newlines aren't + escaped. +- Comments must be "`<`" then "`!-- `" then the data then "` -->`". +- DOCTYPEs must be "``". +- Processing instructions must be "``". (The HTML parser cannot emit + processing instructions, but scripts can, and the WebVTT to DOM + rules can emit them.) +- Template contents are represented by the string "content" with the + children below it. + +The *tag name string* is the local name prefixed by a namespace +designator. For the HTML namespace, the namespace designator is the +empty string, i.e. there's no prefix. For the SVG namespace, the +namespace designator is "svg ". For the MathML namespace, the namespace +designator is "math ". + +The *attribute name string* is the local name prefixed by a namespace +designator. For no namespace, the namespace designator is the empty +string, i.e. there's no prefix. For the XLink namespace, the namespace +designator is "xlink ". For the XML namespace, the namespace designator +is "xml ". For the XMLNS namespace, the namespace designator is "xmlns +". Note the difference between "xlink:href" which is an attribute in no +namespace with the local name "xlink:href" and "xlink href" which is an +attribute in the xlink namespace with the local name "href". + +If there is also a "\#document-fragment" the bit following "\#document" +must be a representation of the HTML fragment serialization for the +context element given by "\#document-fragment". + +For example: + + #data +

One

Two + #errors + 3: Missing document type declaration + #document + | + | + | + |

+ | "One" + |

+ | "Two" diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat b/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat new file mode 100644 index 0000000000000..38f98efded0ae --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat @@ -0,0 +1,354 @@ +#data +

+#errors +(1,3): expected-doctype-but-got-start-tag +(1,10): adoption-agency-1.3 +#document +| +| +| +| +|

+| + +#data +1

23

+#errors +(1,3): expected-doctype-but-got-start-tag +(1,12): adoption-agency-1.3 +#document +| +| +| +| +| "1" +|

+| +| "2" +| "3" + +#data +1 +#errors +(1,3): expected-doctype-but-got-start-tag +(1,17): adoption-agency-1.3 +#document +| +| +| +| +| "1" +|