From 85e66061e29317b2167235c64c9a1adc435260ad Mon Sep 17 00:00:00 2001 From: Hermanni Piirainen Date: Mon, 4 Nov 2024 16:34:16 +0200 Subject: [PATCH] Fix FT.AGGREGATE incompatibilities with Redisearch version >= 2.8.4 (#146) * Fix FT.AGGREGATE incompatibilities with Redisearch version >= 2.8.4 Fixes #132 * Add UNF support for numeric fields --- CHANGELOG.md | 7 +++++ src/Entity/NumericField.php | 12 +++++++ src/Index/PostIndex.php | 62 +++++++++++++++++++------------------ src/PostQuery.php | 40 ++++++++++++------------ 4 files changed, 71 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c508fbf..435814f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Fixed +- Remove INFIELDS from FT.AGGREGATE queries, INFIELDS has no effect there. + - See https://redis.io/docs/latest/commands/ft.aggregate/ +- Fix incompatibility with Redisearch versions >= 2.8.4. + - Remove properties present in GROUPBY from the reducers, they are implicitly included and produce a duplicate property name error when included. +- Set UNF to true for core numeric fields so that they are recognised by RediPress as being present in the schema. + ## [2.0.12] - 2024-10-21 ### Fixed diff --git a/src/Entity/NumericField.php b/src/Entity/NumericField.php index 795e482..f78d155 100644 --- a/src/Entity/NumericField.php +++ b/src/Entity/NumericField.php @@ -26,6 +26,13 @@ class NumericField extends SchemaField { */ public $sortable = false; + /** + * Whether the field is un-normalized. + * + * @var boolean + */ + public $unf = false; + /** * Field constructor * @@ -44,6 +51,7 @@ public function __construct( array $args ) { } $this->sortable = $args['sortable'] ?? false; + $this->unf = $args['unf'] ?? $this->unf; } /** @@ -58,6 +66,10 @@ public function export() : array { $export[] = 'SORTABLE'; } + if ( $this->unf ) { + $export[] = 'UNF'; + } + return $export; } } diff --git a/src/Index/PostIndex.php b/src/Index/PostIndex.php index 5ac8902..2f5b131 100644 --- a/src/Index/PostIndex.php +++ b/src/Index/PostIndex.php @@ -106,7 +106,7 @@ public function __construct( Client $client ) { * * @return int */ - public static function index_total() : int { + public static function index_total(): int { global $wpdb; $ids = intval( $wpdb->get_row( "SELECT count(*) as count FROM $wpdb->posts" )->count ); // phpcs:ignore return $ids; @@ -117,7 +117,7 @@ public static function index_total() : int { * * @return array */ - public function define_core_fields() : array { + public function define_core_fields(): array { // Define the WordPress core fields $core_schema_fields = [ new TextField([ @@ -143,7 +143,7 @@ public function define_core_fields() : array { 'name' => 'post_author', ]), new TextField([ - 'name' => 'post_author_id', + 'name' => 'post_author_id', 'sortable' => true, ]), new TextField([ @@ -153,6 +153,7 @@ public function define_core_fields() : array { new NumericField([ 'name' => 'menu_order', 'sortable' => true, + 'unf' => true, ]), new TextField([ 'name' => 'post_status', @@ -165,6 +166,7 @@ public function define_core_fields() : array { new NumericField([ 'name' => 'post_date', 'sortable' => true, + 'unf' => true, ]), new TextField([ 'name' => 'post_parent', @@ -261,7 +263,7 @@ public function schedule_partial_index( $args = null ) { * @param array|null $args Array containing Limit & offset details or null if not doing a partial index. * @return int Amount of items indexed. */ - public function index_all( array $args = null, array $query_args = [] ) : int { + public function index_all( array $args = null, array $query_args = [] ): int { global $wpdb; define( 'WP_IMPORTING', true ); @@ -311,13 +313,13 @@ public function index_all( array $args = null, array $query_args = [] ) : int { $progress = null; } - $posts = array_map( function( $row ) { + $posts = array_map( function ( $row ) { return get_post( $row->ID ); }, $ids ); $posts = apply_filters( 'redipress/index/posts/custom', $posts ); - $result = array_map( function( $post ) use ( $progress ) { + $result = array_map( function ( $post ) use ( $progress ) { self::$indexing = $post->ID; $language = apply_filters( 'redipress/index/posts/language', $post->lang ?? null, $post ); @@ -362,7 +364,7 @@ public function index_all( array $args = null, array $query_args = [] ) : int { * @param mixed $post_id The current doc id set for the post. * @return string */ - public static function get_document_id( \WP_Post $post, $post_id = null ) : string { + public static function get_document_id( \WP_Post $post, $post_id = null ): string { if ( $post_id && (string) $post->ID !== (string) $post_id ) { $id = $post_id; } @@ -386,7 +388,7 @@ public static function get_document_id( \WP_Post $post, $post_id = null ) : stri * @param \WP_REST_Request|null $request Rest request details or null if not rest api request. * @return int Amount of items indexed. */ - public function index_missing( \WP_REST_Request $request = null, array $query_args = [] ) : int { + public function index_missing( \WP_REST_Request $request = null, array $query_args = [] ): int { global $wpdb; \do_action( 'redipress/before_index_all', $request ); @@ -421,7 +423,7 @@ public function index_missing( \WP_REST_Request $request = null, array $query_ar $progress = \WP_CLI\Utils\make_progress_bar( __( 'Checking existing posts', 'redipress' ), $count ); - $posts = array_filter( $ids, function( $row ) use ( $progress ) { + $posts = array_filter( $ids, function ( $row ) use ( $progress ) { $present = \Geniem\RediPress\get_post( $row->ID ); $progress->tick(); @@ -431,7 +433,7 @@ public function index_missing( \WP_REST_Request $request = null, array $query_ar $progress->finish(); - $posts = array_map( function( $id ) { + $posts = array_map( function ( $id ) { return \get_post( $id ); }, $posts ); @@ -441,7 +443,7 @@ public function index_missing( \WP_REST_Request $request = null, array $query_ar $count += count( $custom_posts ); - $custom_posts = array_filter( $custom_posts, function( $row ) { + $custom_posts = array_filter( $custom_posts, function ( $row ) { $present = \Geniem\RediPress\get_post( $row->ID ); return empty( $present ); @@ -460,7 +462,7 @@ public function index_missing( \WP_REST_Request $request = null, array $query_ar $progress = null; } - $result = array_map( function( $post ) use ( $progress ) { + $result = array_map( function ( $post ) use ( $progress ) { self::$indexing = $post->ID; $language = apply_filters( 'redipress/post_language', $post->lang ?? null, $post ); @@ -586,7 +588,7 @@ public function delete( $post_id ) { * @param \WP_Post $post The post object to convert. * @return array */ - public function convert_post( \WP_Post $post ) : array { + public function convert_post( \WP_Post $post ): array { $settings = new Settings(); \do_action( 'redipress/before_index_post', $post ); @@ -628,7 +630,7 @@ public function convert_post( \WP_Post $post ) : array { $additional_fields = array_diff( $fields, $core_field_names ); - $additional_values = array_map( function( $field ) use ( $post ) { + $additional_values = array_map( function ( $field ) use ( $post ) { $value = self::get( $post->ID, $field ); $value = apply_filters( 'redipress/additional_field/' . $post->ID . '/' . $field, $value, $post ); @@ -655,7 +657,7 @@ public function convert_post( \WP_Post $post ) : array { $additions = array_combine( $additional_fields, $additional_values ); - $additions = array_filter( $additions, function( $item ) { + $additions = array_filter( $additions, function ( $item ) { return ! is_null( $item ); }); @@ -782,7 +784,7 @@ public function convert_post( \WP_Post $post ) : array { * @param string $string Unescaped string. * @return string Escaped $string. */ - public function escape_string( ?string $string = '' ) : string { + public function escape_string( ?string $string = '' ): string { return Utility::escape_string( $string ); } @@ -792,7 +794,7 @@ public function escape_string( ?string $string = '' ) : string { * @param \WP_Post $post Post object. * @return string Post content. */ - public function get_post_content( \WP_Post $post ) : string { + public function get_post_content( \WP_Post $post ): string { $post_content = $post->post_content; switch ( $post->post_type ) { // Handle post content by post type @@ -818,7 +820,7 @@ public function get_post_content( \WP_Post $post ) : string { $pdf = $parser->parseContent( $file_content ); $post_content = $pdf->getText(); } - catch( \Exception $e ) { + catch ( \Exception $e ) { error_log( 'RediPress PDF indexing error: ' . $e->getMessage() ); } } @@ -843,10 +845,10 @@ public function get_post_content( \WP_Post $post ) : string { $post_content = $this->io_factory_get_text( $phpword ); } - catch( \Exception $e ) { + catch ( \Exception $e ) { error_log( 'RediPress Office indexing error: ' . $e->getMessage() ); } - catch( \ValueError $e ) { + catch ( \ValueError $e ) { error_log( 'RediPress Office file not found: ' . $post->post_title ); } break; @@ -875,7 +877,7 @@ public function get_post_content( \WP_Post $post ) : string { $post_content = str_replace( '\r', ' ', $post_content ); // Replace multiple whitespaces with a single space - $post_content = preg_replace('/\s+/S', ' ', $post_content); + $post_content = preg_replace( '/\s+/S', ' ', $post_content ); return $post_content; } @@ -886,7 +888,7 @@ public function get_post_content( \WP_Post $post ) : string { * @param null|string $content The content to strip. * @return string */ - private function strip_tags_except_comments( ?string $content ) : string { + private function strip_tags_except_comments( ?string $content ): string { if ( ! $content ) { return ''; } @@ -908,7 +910,7 @@ private function strip_tags_except_comments( ?string $content ) : string { * @param mixed $current Current item to check for text. * @return string Text content. */ - public function io_factory_get_text( $current ) : string { + public function io_factory_get_text( $current ): string { $post_content = ''; if ( \method_exists( $current, 'getText' ) ) { $post_content .= $current->getText() . "\n"; @@ -933,7 +935,7 @@ public function io_factory_get_text( $current ) : string { * @param \WP_Post $post Attachment post object. * @return string|null File content or null if couldn't retrieve. */ - public function get_uploaded_media_content( \WP_Post $post ) : ?string { + public function get_uploaded_media_content( \WP_Post $post ): ?string { $content = null; $file_path = \get_attached_file( $post->ID ); @@ -955,8 +957,8 @@ public function get_uploaded_media_content( \WP_Post $post ) : ?string { /** * Add a post to the database * - * @param array $converted_post The post in array form. - * @param string|int $id The document ID for RediSearch. + * @param array $converted_post The post in array form. + * @param string|int $id The document ID for RediSearch. * @return mixed */ public function add_post( array $converted_post, $id ) { @@ -977,11 +979,11 @@ public function delete_post( $id ) { * Delete document items by field name and value. * * @param string $field_name The RediPress index field name. - * @param $value The value to look for by the name. + * @param mixed $value The value to look for by the name. * * @return int The number of items deleted. */ - public function delete_by_field( string $field_name, $value ) : int { + public function delete_by_field( string $field_name, $value ): int { // TODO: handle numeric fields. $return = $this->client->raw_command( 'FT.SEARCH', [ $this->index, '@' . $field_name . ':(' . $value . ')' ] ); @@ -1005,7 +1007,7 @@ public function delete_by_field( string $field_name, $value ) : int { * @param string $language The language name. * @return boolean */ - protected function is_language_supported( string $language ) : bool { + protected function is_language_supported( string $language ): bool { return in_array( $language, [ 'arabic', 'danish', @@ -1032,7 +1034,7 @@ protected function is_language_supported( string $language ) : bool { * * @return integer|null */ - public static function indexing() : ?int { + public static function indexing(): ?int { return self::$indexing; } diff --git a/src/PostQuery.php b/src/PostQuery.php index b3d1e34..2ef8a2c 100644 --- a/src/PostQuery.php +++ b/src/PostQuery.php @@ -78,7 +78,7 @@ public function __construct( Client $client, array $index_info ) { add_filter( 'posts_pre_query', [ $this, 'posts_pre_query' ], 10, 2 ); // Reverse filter for getting the Search instance. - add_filter( 'redipress/search_instance', function( $value ) { + add_filter( 'redipress/search_instance', function ( $value ) { return $this; }, 1, 1 ); } @@ -88,7 +88,7 @@ public function __construct( Client $client, array $index_info ) { * * @return Client */ - public function get_client() : Client { + public function get_client(): Client { return $this->client; } @@ -168,8 +168,11 @@ public function search( \WP_Query $query ) { $groupby = [ 'post_id' ]; } + // Remove GROUPBY values from the reducers, they are implicitly included. + $return = array_diff( $return, $groupby ); + // Form the return field clause - $return_fields = array_map( function( string $field ) use ( $reduce_functions ) : array { + $return_fields = array_map( function ( string $field ) use ( $reduce_functions ): array { $return = [ 'REDUCE', $reduce_functions[ $field ] ?? 'FIRST_VALUE', @@ -184,13 +187,12 @@ public function search( \WP_Query $query ) { // Form the final query $command = array_merge( - [ $this->index, $search_query_string, 'INFIELDS', count( $infields ) ], + [ $this->index, $search_query_string ], $geofilter, - $infields, - array_merge( [ 'LOAD', count( $load ) ], array_map( function( $l ) { + array_merge( [ 'LOAD', count( $load ) ], array_map( function ( $l ) { return '@' . $l; }, $load ) ), - array_merge( [ 'GROUPBY', count( $groupby ) ], array_map( function( $g ) { + array_merge( [ 'GROUPBY', count( $groupby ) ], array_map( function ( $g ) { return '@' . $g; }, $groupby ) ), array_reduce( $return_fields, 'array_merge', [] ), @@ -217,12 +219,11 @@ public function search( \WP_Query $query ) { $filter_keys = $matches[1]; $count_command = array_merge( - [ $this->index, $search_query_string, 'INFIELDS', count( $infields ) ], + [ $this->index, $search_query_string ], $geofilter, - $infields, array_merge( $applies ), array_merge( $filters ), - array_merge( [ 'GROUPBY', count( $filter_keys ) ], array_map( function( $f ) { + array_merge( [ 'GROUPBY', count( $filter_keys ) ], array_map( function ( $f ) { return '@' . $f; }, $filter_keys ) ), [ 'REDUCE', 'COUNT', 0, 'AS', 'count' ], @@ -241,9 +242,9 @@ public function search( \WP_Query $query ) { } // Clean the aggregate output to match usual key-value pairs - $results = array_map( function( $result ) { + $results = array_map( function ( $result ) { if ( is_array( $result ) ) { - return array_map( function( $item ) { + return array_map( function ( $item ) { // If we are dealing with an array, just turn it into a string if ( is_array( $item ) ) { return implode( ' ', $item ); @@ -259,7 +260,7 @@ public function search( \WP_Query $query ) { }, $results ); // Store the search query string so that in can be debugged easily via WP_Query. - $query->redisearch_query = 'FT.AGGREGATE ' . implode( ' ', array_map( function( $comm ) { + $query->redisearch_query = 'FT.AGGREGATE ' . implode( ' ', array_map( function ( $comm ) { if ( \strpos( $comm, ' ' ) !== false ) { return '"' . $comm . '"'; } @@ -300,7 +301,7 @@ public function search( \WP_Query $query ) { ); // Store the search query string so that in can be debugged easily via WP_Query. - $query->redisearch_query = 'FT.SEARCH ' . implode( ' ', array_map( function( $comm ) { + $query->redisearch_query = 'FT.SEARCH ' . implode( ' ', array_map( function ( $comm ) { if ( \strpos( $comm, ' ' ) !== false ) { return '"' . $comm . '"'; } @@ -314,8 +315,7 @@ public function search( \WP_Query $query ) { $counts = $this->client->raw_command( 'FT.AGGREGATE', array_merge( - [ $this->index, $count_search_query_string, 'INFIELDS', count( $infields ) ], - $infields, + [ $this->index, $count_search_query_string ], [ 'GROUPBY', 1, '@post_type', 'REDUCE', 'COUNT', '0', 'AS', 'amount' ] ) ); @@ -336,7 +336,7 @@ public function search( \WP_Query $query ) { * @param \WP_Query $query The WP_Query object. * @return array Results or null if no results. */ - public function posts_pre_query( ?array $posts, \WP_Query $query ) : ?array { + public function posts_pre_query( ?array $posts, \WP_Query $query ): ?array { global $wpdb; // If the query is empty, we are probably dealing with the front page and we want to skip RediSearch with that. @@ -450,7 +450,7 @@ public function posts_pre_query( ?array $posts, \WP_Query $query ) : ?array { * @param \WP_Query $query The WP_Query instance. * @return array */ - public function posts_results_single( array $posts, \WP_Query $query ) : array { + public function posts_results_single( array $posts, \WP_Query $query ): array { remove_filter( 'posts_results', [ $this, 'posts_results_single' ], 10 ); return array_map( '\\Geniem\\RediPress\\get_post', array_column( $posts, 'ID' ) ); @@ -462,10 +462,10 @@ public function posts_results_single( array $posts, \WP_Query $query ) : array { * @param array $results Original array to format. * @return array */ - public function format_results( array $results ) : array { + public function format_results( array $results ): array { $results = Utility::format( $results ); - return array_map( function( array $result ) : ?\WP_Post { + return array_map( function ( array $result ): ?\WP_Post { if ( empty( $result['post_object'] ) ) { $formatted = Utility::format( $result ); $post_obj = $formatted['post_object'] ?? null;