Skip to content

Commit

Permalink
Tag Processor: throw when supplied unacceptible attribute names. (#44431
Browse files Browse the repository at this point in the history
)

The `WP_HTML_Tag_Processor` allows setting new HTML attributes with
a given name and value. Previously this has allowed any string input
for the attribute name, but we have to be careful not to print output
that might break the HTML we're modifying.

In this patch we're adding a check against the given attribute name
and rejecting invalid or unacceptible names. WordPress here is more
restrictive than HTML5.

In order to avoid crashing real sites this only throws an exception when
`WP_DEBUG` is set and active; in production environments it ignores
the attribute update, skipping the invalid name.

Co-authored-by: Adam Zieliński <[email protected]>
  • Loading branch information
dmsnell and adamziel authored Sep 30, 2022
1 parent 0b30a4c commit 1ab25e5
Show file tree
Hide file tree
Showing 3 changed files with 1,377 additions and 0 deletions.
45 changes: 45 additions & 0 deletions lib/experimental/html/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1102,12 +1102,57 @@ public function get_tag() {
*
* @param string $name The attribute name to target.
* @param string|boolean $value The new attribute value.
* @throws Exception When WP_DEBUG is true and the attribute name is invalid.
*/
public function set_attribute( $name, $value ) {
if ( null === $this->tag_name_starts_at ) {
return;
}

/*
* Verify that the attribute name is allowable. In WP_DEBUG
* environments we want to crash quickly to alert developers
* of typos and issues; but in production we don't want to
* interrupt a normal page view, so we'll silently avoid
* updating the attribute in those cases.
*
* Of note, we're disallowing more characters than are strictly
* forbidden in HTML5. This is to prevent additional security
* risks deeper in the WordPress and plugin stack. Specifically
* we reject the less-than (<) greater-than (>) and ampersand (&).
*
* The use of a PCRE match allows us to look for specific Unicode
* code points without writing a UTF-8 decoder. Whereas scanning
* for one-byte characters is trivial (with `strcspn`), scanning
* for the longer byte sequences would be more complicated, and
* this shouldn't be in the hot path for execution so we can
* compromise on the efficiency at this point.
*
* @see https://html.spec.whatwg.org/#attributes-2
*/
if ( preg_match(
'~[' .
// Syntax-like characters.
'"\'>&</ =' .
// Control characters.
'\x{00}-\x{1F}' .
// HTML noncharacters.
'\x{FDD0}-\x{FDEF}' .
'\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}' .
'\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}' .
'\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}' .
'\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}' .
'\x{10FFFE}\x{10FFFF}' .
']~Ssu',
$name
) ) {
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
throw new Exception( 'Invalid attribute name' );
}

return;
}

/*
* > The values "true" and "false" are not allowed on boolean attributes.
* > To represent a false value, the attribute has to be omitted altogether.
Expand Down
145 changes: 145 additions & 0 deletions phpunit/html/WP_HTML_Tag_Processor_Isolated_Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
<?php
/**
* Unit tests covering WP_HTML_Tag_Processor functionality.
*
* @package WordPress
* @subpackage HTML
*/

if ( ! function_exists( 'esc_attr' ) ) {
function esc_attr( $s ) {
return str_replace( '"', '&quot;', $s );
}
}

if ( ! class_exists( 'WP_UnitTestCase' ) ) {
abstract class WP_UnitTestCase extends \PHPUnit\Framework\TestCase {}
}

require_once __DIR__ . '/../../lib/experimental/html/index.php';

/**
* Runs tests in isolated PHP process for verifying behaviors
* that depend on the `WP_DEBUG` constant value, if set.
*
* @group html
*
* @coversDefaultClass WP_HTML_Tag_Processor
*/
class WP_HTML_Tag_Processor_Isolated_Test extends WP_UnitTestCase {
// phpcs:disable WordPress.NamingConventions.ValidVariableName.PropertyNotSnakeCase
protected $runTestInSeparateProcess = true;

/**
* Attribute names with invalid characters should be rejected.
*
* When WP_DEBUG is set we want to throw an error to alert a
* developer that they are sending invalid attribute names.
*
* @dataProvider data_invalid_attribute_names
* @covers set_attribute
*/
public function test_set_attribute_throw_when_given_invalid_attribute_names_in_debug_mode( $attribute_name ) {
define( 'WP_DEBUG', true );
$p = new WP_HTML_Tag_Processor( '<span></span>' );

$this->expectException( Exception::class );

$p->next_tag();
$p->set_attribute( $attribute_name, 'test' );

$this->assertEquals( '<span></span>', (string) $p );
}

/**
* Attribute names with invalid characters should be rejected.
*
* When WP_DEBUG isn't set we want to quietly fail to set the
* invalid attribute to avoid breaking the HTML and to do so
* without breaking the entire page.
*
* @dataProvider data_invalid_attribute_names
* @covers set_attribute
*/
public function test_set_attribute_silently_fails_when_given_invalid_attribute_names_outside_of_debug_mode( $attribute_name ) {
$p = new WP_HTML_Tag_Processor( '<span></span>' );

$p->next_tag();
$p->set_attribute( $attribute_name, 'test' );

$this->assertEquals( '<span></span>', (string) $p );
}

/**
* Data provider with invalid HTML attribute names.
*
* @return array {
* @type string $attribute_name Text considered invalid for HTML attribute names.
* }
*/
public function data_invalid_attribute_names() {
return array(
'controls_null' => array( "i\x00d" ),
'controls_newline' => array( "\nbroken-expectations" ),
'space' => array( 'aria label' ),
'double-quote' => array( '"id"' ),
'single-quote' => array( "'id'" ),
'greater-than' => array( 'sneaky>script' ),
'solidus' => array( 'data/test-id' ),
'equals' => array( 'checked=checked' ),
'noncharacters_1' => array( html_entity_decode( 'anything&#xFDD0;' ) ),
'noncharacters_2' => array( html_entity_decode( 'te&#xFFFF;st' ) ),
'noncharacters_3' => array( html_entity_decode( 'te&#x2FFFE;st' ) ),
'noncharacters_4' => array( html_entity_decode( 'te&#xDFFFF;st' ) ),
'noncharacters_5' => array( html_entity_decode( '&#x10FFFE;' ) ),
'wp_no_lt' => array( 'id<script' ),
'wp_no_amp' => array( 'class&lt;script' ),
);
}

/**
* Attribute names with only valid characters should not be rejected.
*
* > Attributes have a name and a value. Attribute names must
* > consist of one or more characters other than controls,
* > U+0020 SPACE, U+0022 ("), U+0027 ('), U+003E (>),
* > U+002F (/), U+003D (=), and noncharacters.
*
* @see https://html.spec.whatwg.org/#attributes-2
*
* @dataProvider data_valid_attribute_names
* @covers set_attribute
*/
public function test_set_attribute_does_not_reject_valid_attribute_names( $attribute_name ) {
define( 'WP_DEBUG', true );
$p = new WP_HTML_Tag_Processor( '<span></span>' );

$p->next_tag();
$p->set_attribute( $attribute_name, 'test' );

$this->assertEquals( "<span $attribute_name=\"test\"></span>", (string) $p );
}

/**
* Data provider with valid HTML attribute names.
*
* @return array {
* @type string $attribute_name Text considered valid for HTML attribute names.
* }
*/
public function data_valid_attribute_names() {
return array(
'ascii_letters' => array( 'abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUWXYZ' ),
'ascii_numbers' => array( '0123456789' ),
'symbols' => array( '!@#$%^*()[]{};:\\||,.?`~£§±' ),
'emoji' => array( '' ),
'utf8_diacritics' => array( 'ÁÄÂÀÃÅČÇĆĎÉĚËÈÊẼĔȆĞÍÌÎÏİŇÑÓÖÒÔÕØŘŔŠŞŤÚŮÜÙÛÝŸŽáäâàãåčçćďéěëèêẽĕȇğíìîïıňñóöòôõøðřŕšşťúůüùûýÿžþÞĐđßÆa' ),
'hebrew_accents' => array( html_entity_decode( '&#x059D;a' ) ),
// See https://arxiv.org/abs/2111.00169.
'rtl_magic' => array( html_entity_decode( '&#x2067;&#x2066;abc&#x2069;&#x2066;def&#x2069;&#x2069;' ) ),
// Only a single unicode "noncharacter" should be rejected. Specific byte segments used in the "noncharacter" sequence are valid.
'noncharacter_segments' => array( "\xFF\xFE" ),
);
}

}
Loading

0 comments on commit 1ab25e5

Please sign in to comment.