Skip to content

Commit

Permalink
Fix phpGH-17122: memory leak in regex
Browse files Browse the repository at this point in the history
Because the subpattern names are persistent, and the fact that the
symbol table destruction is skipped when using fast_shutdown,
this means the refcounts will not be updated for the destruction of
the arrays that hold the subpattern name keys.
To solve this, detect this situation and duplicate the strings.
  • Loading branch information
nielsdos committed Dec 12, 2024
1 parent b86308c commit 9b2ccac
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 4 deletions.
50 changes: 46 additions & 4 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,22 @@ static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce
}
/* }}} */

/* Because the subpattern names are persistent, and the fact that the
* symbol table destruction is skipped when using fast_shutdown,
* this means the refcounts will not be updated for the destruction of
* the arrays that hold the subpattern name keys.
* To solve this, detect this situation and duplicate the strings. */
static zend_always_inline bool has_symbol_table_hazard(void)
{
zend_execute_data *ex = EG(current_execute_data);
if (UNEXPECTED(!ex || !ex->prev_execute_data)) {
return false;
}
/* Note: we may also narrow this to only check if the symbol table equals
* &EG(symbol_table), but this is a cheaper check. */
return ZEND_CALL_INFO(ex->prev_execute_data) & ZEND_CALL_HAS_SYMBOL_TABLE;
}

/* {{{ static calculate_unit_length */
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
Expand Down Expand Up @@ -971,12 +987,26 @@ static zend_always_inline void populate_match_value(

static inline void add_named(
HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
ZEND_ASSERT(GC_FLAGS(name) & IS_STR_PERMANENT);

bool symbol_table_hazard = has_symbol_table_hazard();

/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
* In this case we want to preserve the one that actually has a value. */
if (!unmatched) {
zend_hash_update(subpats, name, val);
if (EXPECTED(!symbol_table_hazard)) {
zend_hash_update(subpats, name, val);
} else {
zend_hash_str_update(subpats, ZSTR_VAL(name), ZSTR_LEN(name), val);
}
} else {
if (!zend_hash_add(subpats, name, val)) {
zval *zv;
if (EXPECTED(!symbol_table_hazard)) {
zv = zend_hash_add(subpats, name, val);
} else {
zv = zend_hash_str_add(subpats, ZSTR_VAL(name), ZSTR_LEN(name), val);
}
if (!zv) {
return;
}
}
Expand Down Expand Up @@ -1061,10 +1091,16 @@ static void populate_subpat_array(
zend_hash_next_index_insert_new(subpats_ht, &val);
}
if (unmatched_as_null) {
bool symbol_table_hazard = has_symbol_table_hazard();

for (i = count; i < num_subpats; i++) {
ZVAL_NULL(&val);
if (subpat_names[i]) {
zend_hash_add(subpats_ht, subpat_names[i], &val);
if (EXPECTED(!symbol_table_hazard)) {
zend_hash_add(subpats_ht, subpat_names[i], &val);
} else {
zend_hash_str_add(subpats_ht, ZSTR_VAL(subpat_names[i]), ZSTR_LEN(subpat_names[i]), &val);
}
}
zend_hash_next_index_insert_new(subpats_ht, &val);
}
Expand Down Expand Up @@ -1429,11 +1465,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
/* Add the match sets to the output array and clean up */
if (match_sets) {
if (subpat_names) {
bool symbol_table_hazard = has_symbol_table_hazard();

for (i = 0; i < num_subpats; i++) {
zval wrapper;
ZVAL_ARR(&wrapper, match_sets[i]);
if (subpat_names[i]) {
zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
if (EXPECTED(!symbol_table_hazard)) {
zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
} else {
zend_hash_str_update(Z_ARRVAL_P(subpats), ZSTR_VAL(subpat_names[i]), ZSTR_LEN(subpat_names[i]), &wrapper);
}
GC_ADDREF(match_sets[i]);
}
zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
Expand Down
30 changes: 30 additions & 0 deletions ext/pcre/tests/gh17122.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
--TEST--
GH-17122 (memory leak in regex)
--FILE--
<?php
preg_match('|(?P<name>)(\d+)|', 0xffffffff, $m1);
var_dump($m1);
\preg_match('|(?P<name2>)(\d+)|', 0, $m2);
var_dump($m2);
?>
--EXPECT--
array(4) {
[0]=>
string(10) "4294967295"
["name"]=>
string(0) ""
[1]=>
string(0) ""
[2]=>
string(10) "4294967295"
}
array(4) {
[0]=>
string(1) "0"
["name2"]=>
string(0) ""
[1]=>
string(0) ""
[2]=>
string(1) "0"
}

0 comments on commit 9b2ccac

Please sign in to comment.