From 717e1cdfdd9b9849c813ab60848bd2c85ce7957f Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Mon, 3 Oct 2022 16:21:16 -0300 Subject: [PATCH 1/5] Refactor format_args --- includes/classes/Indexable/Post/Post.php | 2064 ++++++++++++---------- 1 file changed, 1153 insertions(+), 911 deletions(-) diff --git a/includes/classes/Indexable/Post/Post.php b/includes/classes/Indexable/Post/Post.php index 0f0c81099c..ffa779b1f4 100644 --- a/includes/classes/Indexable/Post/Post.php +++ b/includes/classes/Indexable/Post/Post.php @@ -947,1178 +947,1420 @@ public function prepare_meta( $post ) { * @return array */ public function format_args( $args, $wp_query ) { - if ( ! empty( $args['posts_per_page'] ) ) { - $posts_per_page = (int) $args['posts_per_page']; + $args = $this->sanitize_wp_query_args( $args ); - // ES have a maximum size allowed so we have to convert "-1" to a maximum size. - if ( -1 === $posts_per_page ) { - /** - * Set the maximum results window size. - * - * The request will return a HTTP 500 Internal Error if the size of the - * request is larger than the [index.max_result_window] parameter in ES. - * See the scroll api for a more efficient way to request large data sets. - * - * @return int The max results window size. - * - * @since 2.3.0 - */ + $formatted_args = [ + 'from' => $this->parse_from( $args ), + 'size' => $this->parse_size( $args ), + ]; - /** - * Filter max result size if set to -1 - * - * @hook ep_max_results_window - * @param {int} Max result window - * @return {int} New window - */ - $posts_per_page = apply_filters( 'ep_max_results_window', 10000 ); - } - } else { - $posts_per_page = (int) get_option( 'posts_per_page' ); + $filters = $this->parse_filters( $args, $wp_query ); + + if ( ! empty( $filters ) ) { + $formatted_args['post_filter'] = $filters; } - $formatted_args = array( - 'from' => 0, - 'size' => $posts_per_page, - ); + $formatted_args = $this->maybe_set_search_fields( $formatted_args, $args ); + $formatted_args = $this->maybe_set_fields( $formatted_args, $args ); + $formatted_args = $this->maybe_orderby( $formatted_args, $args ); + $formatted_args = $this->maybe_add_sticky_posts( $formatted_args, $args ); + $formatted_args = $this->maybe_set_aggs( $formatted_args, $args, $filters ); /** - * Order and Orderby arguments + * Filter formatted Elasticsearch [ost ]query (entire query) * - * Used for how Elasticsearch will sort results + * @hook ep_formatted_args + * @param {array} $formatted_args Formatted Elasticsearch query + * @param {array} $query_vars Query variables + * @param {array} $query Query part + * @return {array} New query + */ + $formatted_args = apply_filters( 'ep_formatted_args', $formatted_args, $args, $wp_query ); + + /** + * Filter formatted Elasticsearch [ost ]query (entire query) * - * @since 1.1 + * @hook ep_post_formatted_args + * @param {array} $formatted_args Formatted Elasticsearch query + * @param {array} $query_vars Query variables + * @param {array} $query Query part + * @return {array} New query */ + $formatted_args = apply_filters( 'ep_post_formatted_args', $formatted_args, $args, $wp_query ); - // Set sort order, default is 'desc'. - if ( ! empty( $args['order'] ) ) { - $order = $this->parse_order( $args['order'] ); - } else { - $order = 'desc'; - } + return $formatted_args; + } - // Default sort for non-searches to date. - if ( empty( $args['orderby'] ) && ( ! isset( $args['s'] ) || '' === $args['s'] ) ) { - /** - * Filter default post query order by - * - * @hook ep_set_default_sort - * @param {string} $sort Default sort - * @param {string $order Order direction - * @return {string} New default - */ - $args['orderby'] = apply_filters( 'ep_set_default_sort', 'date', $order ); + /** + * Adjust the fuzziness parameter if needed. + * + * If using fields with type `long`, queries should not have a fuzziness parameter. + * + * @param array $query Current query + * @param array $query_vars Query variables + * @param string $search_text Search text + * @param array $search_fields Search fields + * @return array New query + */ + public function adjust_query_fuzziness( $query, $query_vars, $search_text, $search_fields ) { + if ( empty( array_intersect( $search_fields, [ 'ID', 'post_id', 'post_parent' ] ) ) ) { + return $query; } - // Set sort type. - if ( ! empty( $args['orderby'] ) ) { - $formatted_args['sort'] = $this->parse_orderby( $args['orderby'], $order, $args ); - } else { - // Default sort is to use the score (based on relevance). - $default_sort = array( - array( - '_score' => array( - 'order' => $order, - ), - ), - ); - - /** - * Filter the ES query order (`sort` clause) - * - * This filter is used in searches if `orderby` is not set in the WP_Query args. - * The default value is: - * - * $default_sort = array( - * array( - * '_score' => array( - * 'order' => $order, - * ), - * ), - * ); - * - * @hook ep_set_sort - * @since 3.6.3 - * @param {array} $sort Default sort. - * @param {string} $order Order direction - * @return {array} New default - */ - $default_sort = apply_filters( 'ep_set_sort', $default_sort, $order ); - - $formatted_args['sort'] = $default_sort; + if ( ! isset( $query['bool'] ) || ! isset( $query['bool']['should'] ) ) { + return $query; } - $filter = array( - 'bool' => array( - 'must' => [], - ), - ); - $use_filters = false; - - // Sanitize array query args. Elasticsearch will error if a terms query contains empty items like an - // empty string. - $keys_to_sanitize = [ - 'author__in', - 'author__not_in', - 'category__and', - 'category__in', - 'category__not_in', - 'tag__and', - 'tag__in', - 'tag__not_in', - 'tag_slug__and', - 'tag_slug__in', - 'post_parent__in', - 'post_parent__not_in', - 'post__in', - 'post__not_in', - 'post_name__in', - ]; - foreach ( $keys_to_sanitize as $key ) { - if ( ! isset( $args[ $key ] ) ) { + foreach ( $query['bool']['should'] as &$clause ) { + if ( ! isset( $clause['multi_match'] ) ) { continue; } - $args[ $key ] = array_filter( (array) $args[ $key ] ); - } - /** - * Tax Query support - * - * Support for the tax_query argument of WP_Query. Currently only provides support for the 'AND' relation - * between taxonomies. Field only supports slug, term_id, and name defaulting to term_id. - * - * @use field = slug - * terms array - * @since 0.9.1 - */ - if ( ! empty( $wp_query->tax_query ) && ! empty( $wp_query->tax_query->queries ) ) { - $args['tax_query'] = $wp_query->tax_query->queries; + if ( isset( $clause['multi_match']['fuzziness'] ) ) { + unset( $clause['multi_match']['fuzziness'] ); + } } - if ( ! empty( $args['tax_query'] ) ) { - // Main tax_query array for ES. - $es_tax_query = []; + return $query; + } - $tax_queries = $this->parse_tax_query( $args['tax_query'] ); + /** + * Parse and build out our tax query. + * + * @access protected + * + * @param array $query Tax query + * @return array + */ + protected function parse_tax_query( $query ) { + $tax_query = [ + 'tax_filter' => [], + 'tax_must_not_filter' => [], + ]; + $relation = ''; - if ( ! empty( $tax_queries['tax_filter'] ) ) { - $relation = 'must'; + foreach ( $query as $tax_queries ) { + // If we have a nested tax query, recurse through that + if ( is_array( $tax_queries ) && empty( $tax_queries['taxonomy'] ) ) { + $result = $this->parse_tax_query( $tax_queries ); + $relation = ( ! empty( $tax_queries['relation'] ) ) ? strtolower( $tax_queries['relation'] ) : 'and'; + $filter_type = 'and' === $relation ? 'must' : 'should'; - if ( ! empty( $args['tax_query']['relation'] ) && 'or' === strtolower( $args['tax_query']['relation'] ) ) { - $relation = 'should'; + // Set the proper filter type and must_not filter, as needed + if ( ! empty( $result['tax_must_not_filter'] ) ) { + $tax_query['tax_filter'][] = [ + 'bool' => [ + $filter_type => $result['tax_filter'], + 'must_not' => $result['tax_must_not_filter'], + ], + ]; + } else { + $tax_query['tax_filter'][] = [ + 'bool' => [ + $filter_type => $result['tax_filter'], + ], + ]; } - - $es_tax_query[ $relation ] = $tax_queries['tax_filter']; } - if ( ! empty( $tax_queries['tax_must_not_filter'] ) ) { - $es_tax_query['must_not'] = $tax_queries['tax_must_not_filter']; - } + // Parse each individual tax query part + $single_tax_query = $tax_queries; + if ( ! empty( $single_tax_query['taxonomy'] ) ) { + $terms = isset( $single_tax_query['terms'] ) ? (array) $single_tax_query['terms'] : array(); + $field = ( ! empty( $single_tax_query['field'] ) ) ? $single_tax_query['field'] : 'term_id'; - if ( ! empty( $es_tax_query ) ) { - $filter['bool']['must'][]['bool'] = $es_tax_query; - } + if ( 'name' === $field ) { + $field = 'name.raw'; + } - $use_filters = true; - } + if ( 'slug' === $field ) { + $terms = array_map( 'sanitize_title', $terms ); + } - /** - * 'post_parent' arg support. - * - * @since 2.0 - */ - if ( isset( $args['post_parent'] ) && '' !== $args['post_parent'] && 'any' !== strtolower( $args['post_parent'] ) ) { - $filter['bool']['must'][]['bool']['must'] = array( - 'term' => array( - 'post_parent' => $args['post_parent'], - ), - ); + // Set up our terms object + $terms_obj = array( + 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => array_values( array_filter( $terms ) ), + ); - $use_filters = true; - } + $operator = ( ! empty( $single_tax_query['operator'] ) ) ? strtolower( $single_tax_query['operator'] ) : 'in'; - /** - * 'post__in' arg support. - * - * @since x.x - */ - if ( ! empty( $args['post__in'] ) ) { - $filter['bool']['must'][]['bool']['must'] = array( - 'terms' => array( - 'post_id' => array_values( (array) $args['post__in'] ), - ), - ); + switch ( $operator ) { + case 'exists': + /** + * add support for "EXISTS" operator + * + * @since 2.5 + */ + $tax_query['tax_filter'][]['bool'] = array( + 'must' => array( + array( + 'exists' => array( + 'field' => key( $terms_obj ), + ), + ), + ), + ); - $use_filters = true; - } + break; + case 'not exists': + /** + * add support for "NOT EXISTS" operator + * + * @since 2.5 + */ + $tax_query['tax_filter'][]['bool'] = array( + 'must_not' => array( + array( + 'exists' => array( + 'field' => key( $terms_obj ), + ), + ), + ), + ); - /** - * 'post_name__in' arg support. - * - * @since 3.6.0 - */ - if ( ! empty( $args['post_name__in'] ) ) { - $filter['bool']['must'][]['bool']['must'] = array( - 'terms' => array( - 'post_name.raw' => array_values( (array) $args['post_name__in'] ), - ), - ); + break; + case 'not in': + /** + * add support for "NOT IN" operator + * + * @since 2.1 + */ + // If "NOT IN" than it should filter as must_not + $tax_query['tax_must_not_filter'][]['terms'] = $terms_obj; - $use_filters = true; - } + break; + case 'and': + /** + * add support for "and" operator + * + * @since 2.4 + */ + $and_nest = array( + 'bool' => array( + 'must' => array(), + ), + ); - /** - * 'post__not_in' arg support. - * - * @since x.x - */ - if ( ! empty( $args['post__not_in'] ) ) { - $filter['bool']['must'][]['bool']['must_not'] = array( - 'terms' => array( - 'post_id' => (array) $args['post__not_in'], - ), - ); + foreach ( $terms as $term ) { + $and_nest['bool']['must'][] = array( + 'terms' => array( + 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => (array) $term, + ), + ); + } - $use_filters = true; - } + $tax_query['tax_filter'][] = $and_nest; - /** - * 'category__not_in' arg support. - * - * @since 3.6.0 - */ - if ( ! empty( $args['category__not_in'] ) ) { - $filter['bool']['must'][]['bool']['must_not'] = array( - 'terms' => array( - 'terms.category.term_id' => array_values( (array) $args['category__not_in'] ), - ), - ); + break; + case 'in': + default: + /** + * Default to IN operator + */ + // Add the tax query filter + $tax_query['tax_filter'][]['terms'] = $terms_obj; - $use_filters = true; + break; + } + } } - /** - * 'tag__not_in' arg support. - * - * @since 3.6.0 - */ - if ( ! empty( $args['tag__not_in'] ) ) { - $filter['bool']['must'][]['bool']['must_not'] = array( - 'terms' => array( - 'terms.post_tag.term_id' => array_values( (array) $args['tag__not_in'] ), - ), - ); + return $tax_query; + } - $use_filters = true; + /** + * Parse an 'order' query variable and cast it to ASC or DESC as necessary. + * + * @since 1.1 + * @access protected + * + * @param string $order The 'order' query variable. + * @return string The sanitized 'order' query variable. + */ + protected function parse_order( $order ) { + // Core will always set sort order to DESC for any invalid value, + // so we can't do any automated testing of this function. + // @codeCoverageIgnoreStart + if ( ! is_string( $order ) || empty( $order ) ) { + return 'desc'; } + // @codeCoverageIgnoreEnd - /** - * Author query support - * - * @since 1.0 - */ - if ( ! empty( $args['author'] ) ) { - $filter['bool']['must'][] = array( - 'term' => array( - 'post_author.id' => $args['author'], - ), - ); - - $use_filters = true; - } elseif ( ! empty( $args['author_name'] ) ) { - // Since this was set to use the display name initially, there might be some code that used this feature. - // Let's ensure that any query vars coming in using author_name are in fact slugs. - // This was changed back in ticket #1622 to use the display name, so we removed the sanitize_user() call. - $filter['bool']['must'][] = array( - 'term' => array( - 'post_author.display_name' => $args['author_name'], - ), - ); - - $use_filters = true; - } elseif ( ! empty( $args['author__in'] ) ) { - $filter['bool']['must'][]['bool']['must'] = array( - 'terms' => array( - 'post_author.id' => array_values( (array) $args['author__in'] ), - ), - ); + if ( 'ASC' === strtoupper( $order ) ) { + return 'asc'; + } else { + return 'desc'; + } + } - $use_filters = true; - } elseif ( ! empty( $args['author__not_in'] ) ) { - $filter['bool']['must'][]['bool']['must_not'] = array( - 'terms' => array( - 'post_author.id' => array_values( (array) $args['author__not_in'] ), - ), - ); + /** + * Convert the alias to a properly-prefixed sort value. + * + * @since 1.1 + * @access protected + * + * @param string $orderbys Alias or path for the field to order by. + * @param string $default_order Default order direction + * @param array $args Query args + * @return array + */ + protected function parse_orderby( $orderbys, $default_order, $args ) { + $orderbys = $this->get_orderby_array( $orderbys ); - $use_filters = true; - } + $from_to = [ + 'relevance' => '_score', + 'date' => 'post_date', + 'type' => 'post_type.raw', + 'modified' => 'post_modified', + 'name' => 'post_name.raw', + 'title' => 'post_title.sortable', + ]; - /** - * Add support for post_mime_type - * - * If we have array, it will be fool text search filter. - * If we have string(like filter images in media screen), we will have mime type "image" so need to check it as - * regexp filter. - * - * @since 2.3 - */ - if ( ! empty( $args['post_mime_type'] ) ) { - if ( is_array( $args['post_mime_type'] ) ) { + $sort = []; - $args_post_mime_type = []; + foreach ( $orderbys as $key => $value ) { + if ( is_string( $key ) ) { + $orderby_clause = $key; + $order = $value; + } else { + $orderby_clause = $value; + $order = $default_order; + } - foreach ( $args['post_mime_type'] as $mime_type ) { - /** - * check if matches the MIME type pattern: type/subtype and - * leave an empty string as posts, pages and CPTs don't have a MIME type - */ - if ( preg_match( '/^[-._a-z0-9]+\/[-._a-z0-9]+$/i', $mime_type ) || empty( $mime_type ) ) { - $args_post_mime_type[] = $mime_type; - } else { - $filtered_mime_type_by_type = wp_match_mime_types( $mime_type, wp_get_mime_types() ); + if ( empty( $orderby_clause ) || 'rand' === $orderby_clause ) { + continue; + } - $args_post_mime_type = array_merge( $args_post_mime_type, $filtered_mime_type_by_type[ $mime_type ] ); - } + if ( in_array( $orderby_clause, [ 'meta_value', 'meta_value_num' ], true ) ) { + if ( empty( $args['meta_key'] ) ) { + continue; + } else { + $from_to['meta_value'] = 'meta.' . $args['meta_key'] . '.raw'; + $from_to['meta_value_num'] = 'meta.' . $args['meta_key'] . '.long'; } + } - $filter['bool']['must'][] = array( - 'terms' => array( - 'post_mime_type' => $args_post_mime_type, - ), - ); + $orderby_clause = $from_to[ $orderby_clause ] ?? $orderby_clause; - $use_filters = true; - } elseif ( is_string( $args['post_mime_type'] ) ) { - $filter['bool']['must'][] = array( - 'regexp' => array( - 'post_mime_type' => $args['post_mime_type'] . '.*', - ), - ); + $sort[] = array( + $orderby_clause => array( + 'order' => $order, + ), + ); + } - $use_filters = true; - } + return $sort; + } + + /** + * Get Order by args Array + * + * @param string|array $orderbys Order by string or array + * @since 2.1 + * @return array + */ + protected function get_orderby_array( $orderbys ) { + if ( ! is_array( $orderbys ) ) { + $orderbys = explode( ' ', $orderbys ); } - /** - * Simple date params support - * - * @since 1.3 - */ - $date_filter = DateQuery::simple_es_date_filter( $args ); + return $orderbys; + } - if ( ! empty( $date_filter ) ) { - $filter['bool']['must'][] = $date_filter; - $use_filters = true; + /** + * Given a mapping content, try to determine the version used. + * + * @since 3.6.3 + * + * @param array $mapping Mapping content. + * @param string $index Index name + * @return string Version of the mapping being used. + */ + protected function determine_mapping_version_based_on_existing( $mapping, $index ) { + if ( isset( $mapping[ $index ]['mappings']['post']['_meta']['mapping_version'] ) ) { + return $mapping[ $index ]['mappings']['post']['_meta']['mapping_version']; + } + if ( isset( $mapping[ $index ]['mappings']['_meta']['mapping_version'] ) ) { + return $mapping[ $index ]['mappings']['_meta']['mapping_version']; } /** - * 'date_query' arg support. + * Check for 7-0 mapping. + * If mapping has a `post` type, it can't be ES 7, as mapping types were removed in that release. + * + * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html */ - if ( ! empty( $args['date_query'] ) ) { - - $date_query = new DateQuery( $args['date_query'] ); - - $date_filter = $date_query->get_es_filter(); - - if ( array_key_exists( 'and', $date_filter ) ) { - $filter['bool']['must'][] = $date_filter['and']; - $use_filters = true; - } + if ( ! isset( $mapping[ $index ]['mappings']['post'] ) ) { + return '7-0.php'; } - $meta_queries = []; + $post_mapping = $mapping[ $index ]['mappings']['post']; /** - * Support `meta_key`, `meta_value`, `meta_value_num`, and `meta_compare` query args + * Starting at this point, our tests rely on the post_title.fields.sortable field. + * As this field is present in all our mappings, if this field is not present in + * the mapping, this is a custom mapping. + * + * To have this code working with custom mappings, use the `ep_post_mapping_version_determined` filter. */ - if ( ! empty( $args['meta_key'] ) ) { - $meta_query_array = [ - 'key' => $args['meta_key'], - ]; - - if ( isset( $args['meta_value'] ) && '' !== $args['meta_value'] ) { - $meta_query_array['value'] = $args['meta_value']; - } elseif ( isset( $args['meta_value_num'] ) && '' !== $args['meta_value_num'] ) { - $meta_query_array['value'] = $args['meta_value_num']; - } - - if ( isset( $args['meta_compare'] ) ) { - $meta_query_array['compare'] = $args['meta_compare']; - } - - $meta_queries[] = $meta_query_array; + if ( ! isset( $post_mapping['properties']['post_title']['fields']['sortable'] ) ) { + return 'unknown'; } - /** - * Todo: Support meta_type - */ + $post_title_sortable = $post_mapping['properties']['post_title']['fields']['sortable']; /** - * 'meta_query' arg support. - * - * Relation supports 'AND' and 'OR'. 'AND' is the default. For each individual query, the - * following 'compare' values are supported: =, !=, EXISTS, NOT EXISTS. '=' is the default. + * Check for 5-2 mapping. + * Normalizers on keyword fields were only made available in ES 5.2 * - * @since 1.3 + * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.2/release-notes-5.2.0.html */ - if ( ! empty( $args['meta_query'] ) ) { - $meta_queries = array_merge( $meta_queries, $args['meta_query'] ); + if ( isset( $post_title_sortable['normalizer'] ) ) { + return '5-2.php'; } - if ( ! empty( $meta_queries ) ) { - - $relation = 'must'; - if ( ! empty( $args['meta_query'] ) && ! empty( $args['meta_query']['relation'] ) && 'or' === strtolower( $args['meta_query']['relation'] ) ) { - $relation = 'should'; - } - - // get meta query filter - $meta_filter = $this->build_meta_query( $meta_queries ); - - if ( ! empty( $meta_filter ) ) { - $filter['bool']['must'][] = $meta_filter; - - $use_filters = true; - } + /** + * Check for 5-0 mapping. + * `keyword` fields were only made available in ES 5.0 + * + * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.0/release-notes-5.0.0.html + */ + if ( 'keyword' === $post_title_sortable['type'] ) { + return '5-0.php'; } /** - * Allow for search field specification + * Check for pre-5-0 mapping. + * `string` fields were deprecated in ES 5.0 in favor of text/keyword * - * @since 1.0 + * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.0/release-notes-5.0.0.html */ - if ( ! empty( $args['search_fields'] ) ) { - $search_field_args = $args['search_fields']; - $search_fields = []; - - if ( ! empty( $search_field_args['taxonomies'] ) ) { - $taxes = (array) $search_field_args['taxonomies']; - - foreach ( $taxes as $tax ) { - $search_fields[] = 'terms.' . $tax . '.name'; - } - - unset( $search_field_args['taxonomies'] ); - } - - if ( ! empty( $search_field_args['meta'] ) ) { - $metas = (array) $search_field_args['meta']; + if ( 'string' === $post_title_sortable['type'] ) { + return 'pre-5-0.php'; + } - foreach ( $metas as $meta ) { - $search_fields[] = 'meta.' . $meta . '.value'; - } + return 'unknown'; + } - unset( $search_field_args['meta'] ); - } + /** + * Given ES args, add aggregations to it. + * + * @since 4.1.0 + * @param array $formatted_args Formatted Elasticsearch query + * @param array $agg Aggregation data. + * @param boolean $use_filters Whether filters should be used or not. + * @param array $filter Filters defined so far. + * @return array Formatted Elasticsearch query with the aggregation added. + */ + protected function apply_aggregations( $formatted_args, $agg, $use_filters, $filter ) { + if ( empty( $agg['aggs'] ) ) { + return $formatted_args; + } - if ( in_array( 'author_name', $search_field_args, true ) ) { - $search_fields[] = 'post_author.login'; + // Add a name to the aggregation if it was passed through + $agg_name = ( ! empty( $agg['name'] ) ) ? $agg['name'] : 'aggregation_name'; - $author_name_index = array_search( 'author_name', $search_field_args, true ); - unset( $search_field_args[ $author_name_index ] ); - } + // Add/use the filter if warranted + if ( isset( $agg['use-filter'] ) && false !== $agg['use-filter'] && $use_filters ) { - $search_fields = array_merge( $search_field_args, $search_fields ); + // If a filter is being used, use it on the aggregation as well to receive relevant information to the query + $formatted_args['aggs'][ $agg_name ]['filter'] = $filter; + $formatted_args['aggs'][ $agg_name ]['aggs'] = $agg['aggs']; } else { - $search_fields = array( - 'post_title', - 'post_excerpt', - 'post_content', - ); + $formatted_args['aggs'][ $agg_name ] = $agg['aggs']; } - /** - * Filter default post search fields - * - * If you are using the weighting engine, this filter should not be used. - * Instead, you should use the ep_weighting_configuration_for_search filter. - * - * @hook ep_search_fields - * @param {array} $search_fields Default search fields - * @param {array} $args WP Query arguments - * @return {array} New defaults - */ - $search_fields = apply_filters( 'ep_search_fields', $search_fields, $args ); - - $search_text = ( ! empty( $args['s'] ) ) ? $args['s'] : ''; - - /** - * We are using ep_integrate instead of ep_match_all. ep_match_all will be - * supported for legacy code but may be deprecated and removed eventually. - * - * @since 1.3 - */ - - if ( ! empty( $search_text ) ) { - add_filter( 'ep_post_formatted_args_query', [ $this, 'adjust_query_fuzziness' ], 100, 4 ); + return $formatted_args; + } - $search_algorithm = $this->get_search_algorithm( $search_text, $search_fields, $args ); - $formatted_args['query'] = $search_algorithm->get_query( 'post', $search_text, $search_fields, $args ); - } elseif ( ! empty( $args['ep_match_all'] ) || ! empty( $args['ep_integrate'] ) ) { - $formatted_args['query']['match_all'] = array( - 'boost' => 1, - ); - } + /** + * Get the search algorithm that should be used. + * + * @since 4.3.0 + * @param string $search_text Search term(s) + * @param array $search_fields Search fields + * @param array $query_vars Query vars + * @return SearchAlgorithm Instance of search algorithm to be used + */ + public function get_search_algorithm( string $search_text, array $search_fields, array $query_vars ) : \ElasticPress\SearchAlgorithm { + $search_algorithm_version_option = \ElasticPress\Utils\get_option( 'ep_search_algorithm_version', '4.0' ); /** - * Order by 'rand' support + * Filter the algorithm version to be used. * - * Ref: https://github.com/elastic/elasticsearch/issues/1170 - */ - if ( ! empty( $args['orderby'] ) ) { - $orderbys = $this->get_orderby_array( $args['orderby'] ); - if ( in_array( 'rand', $orderbys, true ) ) { - $formatted_args_query = $formatted_args['query']; - $formatted_args['query'] = []; - $formatted_args['query']['function_score']['query'] = $formatted_args_query; - $formatted_args['query']['function_score']['random_score'] = (object) []; - } - } + * @since 3.5 + * @hook ep_search_algorithm_version + * @param {string} $search_algorithm_version Algorithm version. + * @return {string} New algorithm version + */ + $search_algorithm = apply_filters( 'ep_search_algorithm_version', $search_algorithm_version_option ); /** - * Sticky posts support + * Filter the search algorithm to be used + * + * @hook ep_{$indexable_slug}_search_algorithm + * @since 4.3.0 + * @param {string} $search_algorithm Slug of the search algorithm used as fallback + * @param {string} $search_term Search term + * @param {array} $search_fields Fields to be searched + * @param {array} $query_vars Query variables + * @return {string} New search algorithm slug */ + $search_algorithm = apply_filters( "ep_{$this->slug}_search_algorithm", $search_algorithm, $search_text, $search_fields, $query_vars ); - // Check first if there's sticky posts and show them only in the front page - $sticky_posts = get_option( 'sticky_posts' ); - $sticky_posts = ( is_array( $sticky_posts ) && empty( $sticky_posts ) ) ? false : $sticky_posts; + return \ElasticPress\SearchAlgorithms::factory()->get( $search_algorithm ); + } + /** + * Based on WP_Query arguments, parses the various filters that could be applied into the ES query. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @param WP_Query $query WP_Query object + * @return array + */ + protected function parse_filters( $args, $query ) { /** - * Filter whether to enable sticky posts for this request - * - * @hook ep_enable_sticky_posts - * - * @param {bool} $allow Allow sticky posts for this request - * @param {array} $args Query variables - * @param {array} $formatted_args EP formatted args - * - * @return {bool} $allow + * A note about the order of this array indices: + * As previously there was no way to access each part, some snippets might be accessing + * these filters by its usual numeric indices (see the array_values() call below.) */ - $enable_sticky_posts = apply_filters( 'ep_enable_sticky_posts', is_home(), $args, $formatted_args ); + $filters = [ + 'tax_query' => $this->parse_tax_queries( $args, $query ), + 'post_parent' => $this->parse_post_parent( $args ), + 'post__in' => $this->parse_post__in( $args ), + 'post_name__in' => $this->parse_post_name__in( $args ), + 'post__not_in' => $this->parse_post__not_in( $args ), + 'category__not_in' => $this->parse_category__not_in( $args ), + 'tag__not_in' => $this->parse_tag__not_in( $args ), + 'author' => $this->parse_author( $args ), + 'post_mime_type' => $this->parse_post_mime_type( $args ), + 'date' => $this->parse_date( $args ), + 'meta_query' => $this->parse_meta_queries( $args ), + 'post_type' => $this->parse_post_type( $args ), + 'post_status' => $this->parse_post_status( $args ), + ]; - if ( false !== $sticky_posts - && $enable_sticky_posts - && empty( $args['s'] ) - && in_array( $args['ignore_sticky_posts'], array( 'false', 0, false ), true ) ) { - $new_sort = [ - [ - '_score' => [ - 'order' => 'desc', - ], + $filters = array_values( array_filter( $filters ) ); + $use_filters = ! empty( $filters ); + + if ( $use_filters ) { + $filters = [ + 'bool' => [ + 'must' => $filters, ], ]; + } - $formatted_args['sort'] = array_merge( $new_sort, $formatted_args['sort'] ); + return $filters; + } - $formatted_args_query = $formatted_args['query']; - $formatted_args['query'] = array(); - $formatted_args['query']['function_score']['query'] = $formatted_args_query; - $formatted_args['query']['function_score']['functions'] = array( - // add extra weight to sticky posts to show them on top - (object) array( - 'filter' => array( - 'terms' => array( '_id' => $sticky_posts ), - ), - 'weight' => 20, - ), - ); + /** + * Sanitize WP_Query arguments to be used to create the ES query. + * + * Elasticsearch will error if a terms query contains empty items like an empty string. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function sanitize_wp_query_args( $args ) { + $keys_to_sanitize = [ + 'author__in', + 'author__not_in', + 'category__and', + 'category__in', + 'category__not_in', + 'tag__and', + 'tag__in', + 'tag__not_in', + 'tag_slug__and', + 'tag_slug__in', + 'post_parent__in', + 'post_parent__not_in', + 'post__in', + 'post__not_in', + 'post_name__in', + ]; + foreach ( $keys_to_sanitize as $key ) { + if ( ! isset( $args[ $key ] ) ) { + continue; + } + $args[ $key ] = array_filter( (array) $args[ $key ] ); } - /** - * If not set default to post. If search and not set, default to "any". - */ - if ( ! empty( $args['post_type'] ) ) { - // should NEVER be "any" but just in case - if ( 'any' !== $args['post_type'] ) { - $post_types = (array) $args['post_type']; - $terms_map_name = 'terms'; + return $args; + } - $filter['bool']['must'][] = array( - $terms_map_name => array( - 'post_type.raw' => array_values( $post_types ), - ), - ); + /** + * Parse the `from` clause of the ES Query. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return int + */ + protected function parse_from( $args ) { + $from = 0; - $use_filters = true; - } - } elseif ( empty( $args['s'] ) ) { - $filter['bool']['must'][] = array( - 'term' => array( - 'post_type.raw' => 'post', - ), - ); + if ( isset( $args['offset'] ) ) { + $from = (int) $args['offset']; + } - $use_filters = true; + if ( isset( $args['paged'] ) && $args['paged'] > 1 ) { + $from = $args['posts_per_page'] * ( $args['paged'] - 1 ); } /** - * Like WP_Query in search context, if no post_status is specified we default to "any". To - * be safe you should ALWAYS specify the post_status parameter UNLIKE with WP_Query. + * Fix negative offset. This happens, for example, on hierarchical post types. * - * @since 2.1 + * Ref: https://github.com/10up/ElasticPress/issues/2480 */ - if ( ! empty( $args['post_status'] ) ) { - // should NEVER be "any" but just in case - if ( 'any' !== $args['post_status'] ) { - $post_status = (array) ( is_string( $args['post_status'] ) ? explode( ',', $args['post_status'] ) : $args['post_status'] ); - $post_status = array_map( 'trim', $post_status ); - $terms_map_name = 'terms'; - if ( count( $post_status ) < 2 ) { - $terms_map_name = 'term'; - $post_status = $post_status[0]; - } + if ( $from < 0 ) { + $from = 0; + } - $filter['bool']['must'][] = array( - $terms_map_name => array( - 'post_status' => $post_status, - ), - ); + return $from; + } - $use_filters = true; - } - } else { - $statuses = get_post_stati( array( 'public' => true ) ); + /** + * Parse the `size` clause of the ES Query. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return int + */ + protected function parse_size( $args ) { + if ( ! empty( $args['posts_per_page'] ) ) { + $posts_per_page = (int) $args['posts_per_page']; - if ( is_admin() ) { + // ES have a maximum size allowed so we have to convert "-1" to a maximum size. + if ( -1 === $posts_per_page ) { /** - * In the admin we will add protected and private post statuses to the default query - * per WP default behavior. + * Set the maximum results window size. + * + * The request will return a HTTP 500 Internal Error if the size of the + * request is larger than the [index.max_result_window] parameter in ES. + * See the scroll api for a more efficient way to request large data sets. + * + * @return int The max results window size. + * + * @since 2.3.0 */ - $statuses = array_merge( - $statuses, - get_post_stati( - array( - 'protected' => true, - 'show_in_admin_all_list' => true, - ) - ) - ); - if ( is_user_logged_in() ) { - $statuses = array_merge( $statuses, get_post_stati( array( 'private' => true ) ) ); - } + /** + * Filter max result size if set to -1 + * + * @hook ep_max_results_window + * @param {int} Max result window + * @return {int} New window + */ + $posts_per_page = apply_filters( 'ep_max_results_window', 10000 ); } + } else { + $posts_per_page = (int) get_option( 'posts_per_page' ); + } - $statuses = array_values( $statuses ); + return $posts_per_page; + } - $post_status_filter_type = 'terms'; + /** + * Parse the order of results in the ES query. It could simply be a `sort` clause or a function score query if using RAND. + * + * @since 4.4.0 + * @param array $formatted_args Formatted Elasticsearch query + * @param array $args WP_Query arguments + * @return array + */ + protected function maybe_orderby( $formatted_args, $args ) { + /** + * Order and Orderby arguments + * + * Used for how Elasticsearch will sort results + * + * @since 1.1 + */ - $filter['bool']['must'][] = array( - $post_status_filter_type => array( - 'post_status' => $statuses, + // Set sort order, default is 'desc'. + if ( ! empty( $args['order'] ) ) { + $order = $this->parse_order( $args['order'] ); + } else { + $order = 'desc'; + } + + // Default sort for non-searches to date. + if ( empty( $args['orderby'] ) && ( ! isset( $args['s'] ) || '' === $args['s'] ) ) { + /** + * Filter default post query order by + * + * @hook ep_set_default_sort + * @param {string} $sort Default sort + * @param {string $order Order direction + * @return {string} New default + */ + $args['orderby'] = apply_filters( 'ep_set_default_sort', 'date', $order ); + } + + // Set sort type. + if ( ! empty( $args['orderby'] ) ) { + $formatted_args['sort'] = $this->parse_orderby( $args['orderby'], $order, $args ); + } else { + // Default sort is to use the score (based on relevance). + $default_sort = array( + array( + '_score' => array( + 'order' => $order, + ), ), ); - $use_filters = true; + /** + * Filter the ES query order (`sort` clause) + * + * This filter is used in searches if `orderby` is not set in the WP_Query args. + * The default value is: + * + * $default_sort = array( + * array( + * '_score' => array( + * 'order' => $order, + * ), + * ), + * ); + * + * @hook ep_set_sort + * @since 3.6.3 + * @param {array} $sort Default sort. + * @param {string} $order Order direction + * @return {array} New default + */ + $default_sort = apply_filters( 'ep_set_sort', $default_sort, $order ); + + $formatted_args['sort'] = $default_sort; } - if ( isset( $args['offset'] ) ) { - $formatted_args['from'] = (int) $args['offset']; + /** + * Order by 'rand' support + * + * Ref: https://github.com/elastic/elasticsearch/issues/1170 + */ + if ( ! empty( $args['orderby'] ) ) { + $orderbys = $this->get_orderby_array( $args['orderby'] ); + if ( in_array( 'rand', $orderbys, true ) ) { + $formatted_args_query = $formatted_args['query']; + $formatted_args['query'] = []; + $formatted_args['query']['function_score']['query'] = $formatted_args_query; + $formatted_args['query']['function_score']['random_score'] = (object) []; + } } - if ( isset( $args['paged'] ) && $args['paged'] > 1 ) { - $formatted_args['from'] = $args['posts_per_page'] * ( $args['paged'] - 1 ); - } + return $formatted_args; + } + /** + * Parse all taxonomy queries. + * + * Although the name may be misleading, it handles the `tax_query` argument. There is a `parse_tax_query` that handles each "small" query. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @param WP_Query $query WP_Query object + * @return array + */ + protected function parse_tax_queries( $args, $query ) { /** - * Fix negative offset. This happens, for example, on hierarchical post types. + * Tax Query support * - * Ref: https://github.com/10up/ElasticPress/issues/2480 + * Support for the tax_query argument of WP_Query. Currently only provides support for the 'AND' relation + * between taxonomies. Field only supports slug, term_id, and name defaulting to term_id. + * + * @use field = slug + * terms array + * @since 0.9.1 */ - if ( $formatted_args['from'] < 0 ) { - $formatted_args['from'] = 0; + if ( ! empty( $query->tax_query ) && ! empty( $query->tax_query->queries ) ) { + $args['tax_query'] = $query->tax_query->queries; } - if ( $use_filters ) { - $formatted_args['post_filter'] = $filter; + if ( empty( $args['tax_query'] ) ) { + return []; } - /** - * Support fields. - */ - if ( isset( $args['fields'] ) ) { - switch ( $args['fields'] ) { - case 'ids': - $formatted_args['_source'] = array( - 'includes' => array( - 'post_id', - ), - ); - break; + // Main tax_query array for ES. + $es_tax_query = []; - case 'id=>parent': - $formatted_args['_source'] = array( - 'includes' => array( - 'post_id', - 'post_parent', - ), - ); - break; - } - } + $tax_queries = $this->parse_tax_query( $args['tax_query'] ); - /** - * Aggregations - */ - if ( ! empty( $args['aggs'] ) && is_array( $args['aggs'] ) ) { - // Check if the array indexes are all numeric. - $agg_keys = array_keys( $args['aggs'] ); - $agg_num_keys = array_filter( $agg_keys, 'is_int' ); - $has_only_num_keys = count( $agg_num_keys ) === count( $args['aggs'] ); + if ( ! empty( $tax_queries['tax_filter'] ) ) { + $relation = 'must'; - if ( $has_only_num_keys ) { - foreach ( $args['aggs'] as $agg ) { - $formatted_args = $this->apply_aggregations( $formatted_args, $agg, $use_filters, $filter ); - } - } else { - // Single aggregation. - $formatted_args = $this->apply_aggregations( $formatted_args, $args['aggs'], $use_filters, $filter ); + if ( ! empty( $args['tax_query']['relation'] ) && 'or' === strtolower( $args['tax_query']['relation'] ) ) { + $relation = 'should'; } + + $es_tax_query[ $relation ] = $tax_queries['tax_filter']; } - /** - * Filter formatted Elasticsearch [ost ]query (entire query) - * - * @hook ep_formatted_args - * @param {array} $formatted_args Formatted Elasticsearch query - * @param {array} $query_vars Query variables - * @param {array} $query Query part - * @return {array} New query - */ - $formatted_args = apply_filters( 'ep_formatted_args', $formatted_args, $args, $wp_query ); + if ( ! empty( $tax_queries['tax_must_not_filter'] ) ) { + $es_tax_query['must_not'] = $tax_queries['tax_must_not_filter']; + } - /** - * Filter formatted Elasticsearch [ost ]query (entire query) - * - * @hook ep_post_formatted_args - * @param {array} $formatted_args Formatted Elasticsearch query - * @param {array} $query_vars Query variables - * @param {array} $query Query part - * @return {array} New query - */ - $formatted_args = apply_filters( 'ep_post_formatted_args', $formatted_args, $args, $wp_query ); + if ( ! empty( $es_tax_query ) ) { + return [ 'bool' => $es_tax_query ]; + } - return $formatted_args; + return []; } /** - * Adjust the fuzziness parameter if needed. - * - * If using fields with type `long`, queries should not have a fuzziness parameter. + * Parse the `post_parent` WP Query arg and transform it into an ES query clause. * - * @param array $query Current query - * @param array $query_vars Query variables - * @param string $search_text Search text - * @param array $search_fields Search fields - * @return array New query + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array */ - public function adjust_query_fuzziness( $query, $query_vars, $search_text, $search_fields ) { - if ( empty( array_intersect( $search_fields, [ 'ID', 'post_id', 'post_parent' ] ) ) ) { - return $query; + protected function parse_post_parent( $args ) { + if ( isset( $args['post_parent'] ) && '' !== $args['post_parent'] && 'any' !== strtolower( $args['post_parent'] ) ) { + return [ + 'bool' => [ + 'must' => [ + 'term' => [ + 'post_parent' => $args['post_parent'], + ], + ], + ], + ]; } - if ( ! isset( $query['bool'] ) || ! isset( $query['bool']['should'] ) ) { - return $query; + return []; + } + + /** + * Parse the `post__in` WP Query arg and transform it into an ES query clause. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_post__in( $args ) { + if ( ! empty( $args['post__in'] ) ) { + return [ + 'bool' => [ + 'must' => [ + 'terms' => [ + 'post_id' => array_values( (array) $args['post__in'] ), + ], + ], + ], + ]; } - foreach ( $query['bool']['should'] as &$clause ) { - if ( ! isset( $clause['multi_match'] ) ) { - continue; - } + return []; + } - if ( isset( $clause['multi_match']['fuzziness'] ) ) { - unset( $clause['multi_match']['fuzziness'] ); - } + /** + * Parse the `post_name__in` WP Query arg and transform it into an ES query clause. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_post_name__in( $args ) { + if ( ! empty( $args['post_name__in'] ) ) { + return [ + 'bool' => [ + 'must' => [ + 'terms' => [ + 'post_name.raw' => array_values( (array) $args['post_name__in'] ), + ], + ], + ], + ]; } - return $query; + return []; } /** - * Parse and build out our tax query. - * - * @access protected + * Parse the `post__not_in` WP Query arg and transform it into an ES query clause. * - * @param array $query Tax query + * @since 4.4.0 + * @param array $args WP_Query arguments * @return array */ - protected function parse_tax_query( $query ) { - $tax_query = [ - 'tax_filter' => [], - 'tax_must_not_filter' => [], - ]; - $relation = ''; + protected function parse_post__not_in( $args ) { + if ( ! empty( $args['post__not_in'] ) ) { + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'post_id' => (array) $args['post__not_in'], + ], + ], + ], + ]; + } - foreach ( $query as $tax_queries ) { - // If we have a nested tax query, recurse through that - if ( is_array( $tax_queries ) && empty( $tax_queries['taxonomy'] ) ) { - $result = $this->parse_tax_query( $tax_queries ); - $relation = ( ! empty( $tax_queries['relation'] ) ) ? strtolower( $tax_queries['relation'] ) : 'and'; - $filter_type = 'and' === $relation ? 'must' : 'should'; + return []; + } - // Set the proper filter type and must_not filter, as needed - if ( ! empty( $result['tax_must_not_filter'] ) ) { - $tax_query['tax_filter'][] = [ - 'bool' => [ - $filter_type => $result['tax_filter'], - 'must_not' => $result['tax_must_not_filter'], - ], - ]; - } else { - $tax_query['tax_filter'][] = [ - 'bool' => [ - $filter_type => $result['tax_filter'], + /** + * Parse the `category__not_in` WP Query arg and transform it into an ES query clause. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_category__not_in( $args ) { + if ( ! empty( $args['category__not_in'] ) ) { + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'terms.category.term_id' => array_values( (array) $args['category__not_in'] ), ], - ]; - } - } - - // Parse each individual tax query part - $single_tax_query = $tax_queries; - if ( ! empty( $single_tax_query['taxonomy'] ) ) { - $terms = isset( $single_tax_query['terms'] ) ? (array) $single_tax_query['terms'] : array(); - $field = ( ! empty( $single_tax_query['field'] ) ) ? $single_tax_query['field'] : 'term_id'; + ], + ], + ]; + } - if ( 'name' === $field ) { - $field = 'name.raw'; - } + return []; + } - if ( 'slug' === $field ) { - $terms = array_map( 'sanitize_title', $terms ); - } + /** + * Parse the `tag__not_in` WP Query arg and transform it into an ES query clause. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_tag__not_in( $args ) { + if ( ! empty( $args['tag__not_in'] ) ) { + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'terms.post_tag.term_id' => array_values( (array) $args['tag__not_in'] ), + ], + ], + ], + ]; + } - // Set up our terms object - $terms_obj = array( - 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => array_values( array_filter( $terms ) ), - ); + return []; + } - $operator = ( ! empty( $single_tax_query['operator'] ) ) ? strtolower( $single_tax_query['operator'] ) : 'in'; + /** + * Parse the various author-related WP Query args and transform them into ES query clauses. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_author( $args ) { + if ( ! empty( $args['author'] ) ) { + return [ + 'term' => [ + 'post_author.id' => $args['author'], + ], + ]; + } - switch ( $operator ) { - case 'exists': - /** - * add support for "EXISTS" operator - * - * @since 2.5 - */ - $tax_query['tax_filter'][]['bool'] = array( - 'must' => array( - array( - 'exists' => array( - 'field' => key( $terms_obj ), - ), - ), - ), - ); + if ( ! empty( $args['author_name'] ) ) { + // Since this was set to use the display name initially, there might be some code that used this feature. + // Let's ensure that any query vars coming in using author_name are in fact slugs. + // This was changed back in ticket #1622 to use the display name, so we removed the sanitize_user() call. + return [ + 'term' => [ + 'post_author.display_name' => $args['author_name'], + ], + ]; + } - break; - case 'not exists': - /** - * add support for "NOT EXISTS" operator - * - * @since 2.5 - */ - $tax_query['tax_filter'][]['bool'] = array( - 'must_not' => array( - array( - 'exists' => array( - 'field' => key( $terms_obj ), - ), - ), - ), - ); + if ( ! empty( $args['author__in'] ) ) { + return [ + 'bool' => [ + 'must' => [ + 'terms' => [ + 'post_author.id' => array_values( (array) $args['author__in'] ), + ], + ], + ], + ]; + } - break; - case 'not in': - /** - * add support for "NOT IN" operator - * - * @since 2.1 - */ - // If "NOT IN" than it should filter as must_not - $tax_query['tax_must_not_filter'][]['terms'] = $terms_obj; + if ( ! empty( $args['author__not_in'] ) ) { + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'post_author.id' => array_values( (array) $args['author__not_in'] ), + ], + ], + ], + ]; + } - break; - case 'and': - /** - * add support for "and" operator - * - * @since 2.4 - */ - $and_nest = array( - 'bool' => array( - 'must' => array(), - ), - ); + return []; + } - foreach ( $terms as $term ) { - $and_nest['bool']['must'][] = array( - 'terms' => array( - 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => (array) $term, - ), - ); - } + /** + * Parse the `post_mime_type` WP Query arg and transform it into an ES query clause. + * + * If we have array, it will be fool text search filter. + * If we have string(like filter images in media screen), we will have mime type "image" so need to check it as + * regexp filter. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_post_mime_type( $args ) { + if ( ! empty( $args['post_mime_type'] ) ) { + if ( is_array( $args['post_mime_type'] ) ) { - $tax_query['tax_filter'][] = $and_nest; + $args_post_mime_type = []; - break; - case 'in': - default: - /** - * Default to IN operator - */ - // Add the tax query filter - $tax_query['tax_filter'][]['terms'] = $terms_obj; + foreach ( $args['post_mime_type'] as $mime_type ) { + /** + * check if matches the MIME type pattern: type/subtype and + * leave an empty string as posts, pages and CPTs don't have a MIME type + */ + if ( preg_match( '/^[-._a-z0-9]+\/[-._a-z0-9]+$/i', $mime_type ) || empty( $mime_type ) ) { + $args_post_mime_type[] = $mime_type; + } else { + $filtered_mime_type_by_type = wp_match_mime_types( $mime_type, wp_get_mime_types() ); - break; + $args_post_mime_type = array_merge( $args_post_mime_type, $filtered_mime_type_by_type[ $mime_type ] ); + } } + + return [ + 'terms' => [ + 'post_mime_type' => $args_post_mime_type, + ], + ]; + } elseif ( is_string( $args['post_mime_type'] ) ) { + return [ + 'regexp' => array( + 'post_mime_type' => $args['post_mime_type'] . '.*', + ), + ]; } } - - return $tax_query; + return []; } /** - * Parse an 'order' query variable and cast it to ASC or DESC as necessary. - * - * @since 1.1 - * @access protected + * Parse the various date-related WP Query args and transform them into ES query clauses. * - * @param string $order The 'order' query variable. - * @return string The sanitized 'order' query variable. + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array */ - protected function parse_order( $order ) { - // Core will always set sort order to DESC for any invalid value, - // so we can't do any automated testing of this function. - // @codeCoverageIgnoreStart - if ( ! is_string( $order ) || empty( $order ) ) { - return 'desc'; + protected function parse_date( $args ) { + $date_filter = DateQuery::simple_es_date_filter( $args ); + + if ( ! empty( $date_filter ) ) { + return $date_filter; } - // @codeCoverageIgnoreEnd - if ( 'ASC' === strtoupper( $order ) ) { - return 'asc'; - } else { - return 'desc'; + if ( ! empty( $args['date_query'] ) ) { + + $date_query = new DateQuery( $args['date_query'] ); + + $date_filter = $date_query->get_es_filter(); + + if ( array_key_exists( 'and', $date_filter ) ) { + return $date_filter['and']; + } } } /** - * Convert the alias to a properly-prefixed sort value. + * Parse all meta queries. * - * @since 1.1 - * @access protected + * Although the name may be misleading, it handles the `meta_query` argument. There is a `build_meta_query` that handles each "small" query. * - * @param string $orderbys Alias or path for the field to order by. - * @param string $default_order Default order direction - * @param array $args Query args + * @since 4.4.0 + * @param array $args WP_Query arguments * @return array */ - protected function parse_orderby( $orderbys, $default_order, $args ) { - $orderbys = $this->get_orderby_array( $orderbys ); + protected function parse_meta_queries( $args ) { + $meta_queries = []; - $from_to = [ - 'relevance' => '_score', - 'date' => 'post_date', - 'type' => 'post_type.raw', - 'modified' => 'post_modified', - 'name' => 'post_name.raw', - 'title' => 'post_title.sortable', - ]; + /** + * Support `meta_key`, `meta_value`, `meta_value_num`, and `meta_compare` query args + */ + if ( ! empty( $args['meta_key'] ) ) { + $meta_query_array = [ + 'key' => $args['meta_key'], + ]; - $sort = []; + if ( isset( $args['meta_value'] ) && '' !== $args['meta_value'] ) { + $meta_query_array['value'] = $args['meta_value']; + } elseif ( isset( $args['meta_value_num'] ) && '' !== $args['meta_value_num'] ) { + $meta_query_array['value'] = $args['meta_value_num']; + } - foreach ( $orderbys as $key => $value ) { - if ( is_string( $key ) ) { - $orderby_clause = $key; - $order = $value; - } else { - $orderby_clause = $value; - $order = $default_order; + if ( isset( $args['meta_compare'] ) ) { + $meta_query_array['compare'] = $args['meta_compare']; } - if ( empty( $orderby_clause ) || 'rand' === $orderby_clause ) { - continue; + $meta_queries[] = $meta_query_array; + } + + /** + * Todo: Support meta_type + */ + + /** + * 'meta_query' arg support. + * + * Relation supports 'AND' and 'OR'. 'AND' is the default. For each individual query, the + * following 'compare' values are supported: =, !=, EXISTS, NOT EXISTS. '=' is the default. + * + * @since 1.3 + */ + if ( ! empty( $args['meta_query'] ) ) { + $meta_queries = array_merge( $meta_queries, $args['meta_query'] ); + } + + if ( ! empty( $meta_queries ) ) { + + $relation = 'must'; + if ( ! empty( $args['meta_query'] ) && ! empty( $args['meta_query']['relation'] ) && 'or' === strtolower( $args['meta_query']['relation'] ) ) { + $relation = 'should'; } - if ( in_array( $orderby_clause, [ 'meta_value', 'meta_value_num' ], true ) ) { - if ( empty( $args['meta_key'] ) ) { - continue; - } else { - $from_to['meta_value'] = 'meta.' . $args['meta_key'] . '.raw'; - $from_to['meta_value_num'] = 'meta.' . $args['meta_key'] . '.long'; - } + // get meta query filter + $meta_filter = $this->build_meta_query( $meta_queries ); + + if ( ! empty( $meta_filter ) ) { + return $meta_filter; } + } - $orderby_clause = $from_to[ $orderby_clause ] ?? $orderby_clause; + return []; + } - $sort[] = array( - $orderby_clause => array( - 'order' => $order, - ), - ); + /** + * Parse the `post_type` WP Query arg and transform it into an ES query clause. + * + * @since 4.4.0 + * @param array $args WP_Query arguments + * @return array + */ + protected function parse_post_type( $args ) { + /** + * If not set default to post. If search and not set, default to "any". + */ + if ( ! empty( $args['post_type'] ) ) { + // should NEVER be "any" but just in case + if ( 'any' !== $args['post_type'] ) { + $post_types = (array) $args['post_type']; + $terms_map_name = 'terms'; + + return [ + $terms_map_name => [ + 'post_type.raw' => array_values( $post_types ), + ], + ]; + } + } elseif ( empty( $args['s'] ) ) { + return [ + 'term' => [ + 'post_type.raw' => 'post', + ], + ]; } - return $sort; + return []; } /** - * Get Order by args Array + * Parse the `post_status` WP Query arg and transform it into an ES query clause. * - * @param string|array $orderbys Order by string or array - * @since 2.1 + * @since 4.4.0 + * @param array $args WP_Query arguments * @return array */ - protected function get_orderby_array( $orderbys ) { - if ( ! is_array( $orderbys ) ) { - $orderbys = explode( ' ', $orderbys ); + protected function parse_post_status( $args ) { + /** + * Like WP_Query in search context, if no post_status is specified we default to "any". To + * be safe you should ALWAYS specify the post_status parameter UNLIKE with WP_Query. + * + * @since 2.1 + */ + if ( ! empty( $args['post_status'] ) ) { + // should NEVER be "any" but just in case + if ( 'any' !== $args['post_status'] ) { + $post_status = (array) ( is_string( $args['post_status'] ) ? explode( ',', $args['post_status'] ) : $args['post_status'] ); + $post_status = array_map( 'trim', $post_status ); + $terms_map_name = 'terms'; + if ( count( $post_status ) < 2 ) { + $terms_map_name = 'term'; + $post_status = $post_status[0]; + } + + return [ + $terms_map_name => [ + 'post_status' => $post_status, + ], + ]; + } + } else { + $statuses = get_post_stati( array( 'public' => true ) ); + + if ( is_admin() ) { + /** + * In the admin we will add protected and private post statuses to the default query + * per WP default behavior. + */ + $statuses = array_merge( + $statuses, + get_post_stati( + array( + 'protected' => true, + 'show_in_admin_all_list' => true, + ) + ) + ); + + if ( is_user_logged_in() ) { + $statuses = array_merge( $statuses, get_post_stati( array( 'private' => true ) ) ); + } + } + + $statuses = array_values( $statuses ); + + $post_status_filter_type = 'terms'; + + return [ + $post_status_filter_type => [ + 'post_status' => $statuses, + ], + ]; } - return $orderbys; + return []; } /** - * Given a mapping content, try to determine the version used. - * - * @since 3.6.3 + * If in a search context set search fields, otherwise query everything. * - * @param array $mapping Mapping content. - * @param string $index Index name - * @return string Version of the mapping being used. + * @since 4.4.0 + * @param array $formatted_args Formatted Elasticsearch query + * @param array $args WP_Query arguments + * @return array */ - protected function determine_mapping_version_based_on_existing( $mapping, $index ) { - if ( isset( $mapping[ $index ]['mappings']['post']['_meta']['mapping_version'] ) ) { - return $mapping[ $index ]['mappings']['post']['_meta']['mapping_version']; - } - if ( isset( $mapping[ $index ]['mappings']['_meta']['mapping_version'] ) ) { - return $mapping[ $index ]['mappings']['_meta']['mapping_version']; - } - + protected function maybe_set_search_fields( $formatted_args, $args ) { /** - * Check for 7-0 mapping. - * If mapping has a `post` type, it can't be ES 7, as mapping types were removed in that release. + * Allow for search field specification * - * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html + * @since 1.0 */ - if ( ! isset( $mapping[ $index ]['mappings']['post'] ) ) { - return '7-0.php'; - } + if ( ! empty( $args['search_fields'] ) ) { + $search_field_args = $args['search_fields']; + $search_fields = []; - $post_mapping = $mapping[ $index ]['mappings']['post']; + if ( ! empty( $search_field_args['taxonomies'] ) ) { + $taxes = (array) $search_field_args['taxonomies']; + + foreach ( $taxes as $tax ) { + $search_fields[] = 'terms.' . $tax . '.name'; + } + + unset( $search_field_args['taxonomies'] ); + } + + if ( ! empty( $search_field_args['meta'] ) ) { + $metas = (array) $search_field_args['meta']; + + foreach ( $metas as $meta ) { + $search_fields[] = 'meta.' . $meta . '.value'; + } + + unset( $search_field_args['meta'] ); + } + + if ( in_array( 'author_name', $search_field_args, true ) ) { + $search_fields[] = 'post_author.login'; + + $author_name_index = array_search( 'author_name', $search_field_args, true ); + unset( $search_field_args[ $author_name_index ] ); + } + + $search_fields = array_merge( $search_field_args, $search_fields ); + } else { + $search_fields = array( + 'post_title', + 'post_excerpt', + 'post_content', + ); + } /** - * Starting at this point, our tests rely on the post_title.fields.sortable field. - * As this field is present in all our mappings, if this field is not present in - * the mapping, this is a custom mapping. + * Filter default post search fields * - * To have this code working with custom mappings, use the `ep_post_mapping_version_determined` filter. + * If you are using the weighting engine, this filter should not be used. + * Instead, you should use the ep_weighting_configuration_for_search filter. + * + * @hook ep_search_fields + * @param {array} $search_fields Default search fields + * @param {array} $args WP Query arguments + * @return {array} New defaults */ - if ( ! isset( $post_mapping['properties']['post_title']['fields']['sortable'] ) ) { - return 'unknown'; - } + $search_fields = apply_filters( 'ep_search_fields', $search_fields, $args ); - $post_title_sortable = $post_mapping['properties']['post_title']['fields']['sortable']; + $search_text = ( ! empty( $args['s'] ) ) ? $args['s'] : ''; /** - * Check for 5-2 mapping. - * Normalizers on keyword fields were only made available in ES 5.2 + * We are using ep_integrate instead of ep_match_all. ep_match_all will be + * supported for legacy code but may be deprecated and removed eventually. * - * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.2/release-notes-5.2.0.html + * @since 1.3 */ - if ( isset( $post_title_sortable['normalizer'] ) ) { - return '5-2.php'; + + if ( ! empty( $search_text ) ) { + add_filter( 'ep_post_formatted_args_query', [ $this, 'adjust_query_fuzziness' ], 100, 4 ); + + $search_algorithm = $this->get_search_algorithm( $search_text, $search_fields, $args ); + $formatted_args['query'] = $search_algorithm->get_query( 'post', $search_text, $search_fields, $args ); + } elseif ( ! empty( $args['ep_match_all'] ) || ! empty( $args['ep_integrate'] ) ) { + $formatted_args['query']['match_all'] = array( + 'boost' => 1, + ); } + return $formatted_args; + } + + /** + * If needed bring sticky posts and order them. + * + * @since 4.4.0 + * @param array $formatted_args Formatted Elasticsearch query + * @param array $args WP_Query arguments + * @return array + */ + protected function maybe_add_sticky_posts( $formatted_args, $args ) { /** - * Check for 5-0 mapping. - * `keyword` fields were only made available in ES 5.0 - * - * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.0/release-notes-5.0.0.html + * Sticky posts support */ - if ( 'keyword' === $post_title_sortable['type'] ) { - return '5-0.php'; - } + + // Check first if there's sticky posts and show them only in the front page + $sticky_posts = get_option( 'sticky_posts' ); + $sticky_posts = ( is_array( $sticky_posts ) && empty( $sticky_posts ) ) ? false : $sticky_posts; /** - * Check for pre-5-0 mapping. - * `string` fields were deprecated in ES 5.0 in favor of text/keyword + * Filter whether to enable sticky posts for this request * - * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.0/release-notes-5.0.0.html + * @hook ep_enable_sticky_posts + * + * @param {bool} $allow Allow sticky posts for this request + * @param {array} $args Query variables + * @param {array} $formatted_args EP formatted args + * + * @return {bool} $allow */ - if ( 'string' === $post_title_sortable['type'] ) { - return 'pre-5-0.php'; + $enable_sticky_posts = apply_filters( 'ep_enable_sticky_posts', is_home(), $args, $formatted_args ); + + if ( false !== $sticky_posts + && $enable_sticky_posts + && empty( $args['s'] ) + && in_array( $args['ignore_sticky_posts'], array( 'false', 0, false ), true ) ) { + $new_sort = [ + [ + '_score' => [ + 'order' => 'desc', + ], + ], + ]; + + $formatted_args['sort'] = array_merge( $new_sort, $formatted_args['sort'] ); + + $formatted_args_query = $formatted_args['query']; + $formatted_args['query'] = array(); + $formatted_args['query']['function_score']['query'] = $formatted_args_query; + $formatted_args['query']['function_score']['functions'] = array( + // add extra weight to sticky posts to show them on top + (object) array( + 'filter' => array( + 'terms' => array( '_id' => $sticky_posts ), + ), + 'weight' => 20, + ), + ); } - return 'unknown'; + return $formatted_args; } /** - * Given ES args, add aggregations to it. + * If needed set the `fields` ES query clause. * - * @since 4.1.0 - * @param array $formatted_args Formatted Elasticsearch query. - * @param array $agg Aggregation data. - * @param boolean $use_filters Whether filters should be used or not. - * @param array $filter Filters defined so far. - * @return array Formatted Elasticsearch query with the aggregation added. + * @since 4.4.0 + * @param array $formatted_args Formatted Elasticsearch query + * @param array $args WP_Query arguments + * @return array */ - protected function apply_aggregations( $formatted_args, $agg, $use_filters, $filter ) { - if ( empty( $agg['aggs'] ) ) { - return $formatted_args; - } - - // Add a name to the aggregation if it was passed through - $agg_name = ( ! empty( $agg['name'] ) ) ? $agg['name'] : 'aggregation_name'; - - // Add/use the filter if warranted - if ( isset( $agg['use-filter'] ) && false !== $agg['use-filter'] && $use_filters ) { + protected function maybe_set_fields( $formatted_args, $args ) { + /** + * Support fields. + */ + if ( isset( $args['fields'] ) ) { + switch ( $args['fields'] ) { + case 'ids': + $formatted_args['_source'] = array( + 'includes' => array( + 'post_id', + ), + ); + break; - // If a filter is being used, use it on the aggregation as well to receive relevant information to the query - $formatted_args['aggs'][ $agg_name ]['filter'] = $filter; - $formatted_args['aggs'][ $agg_name ]['aggs'] = $agg['aggs']; - } else { - $formatted_args['aggs'][ $agg_name ] = $agg['aggs']; + case 'id=>parent': + $formatted_args['_source'] = array( + 'includes' => array( + 'post_id', + 'post_parent', + ), + ); + break; + } } return $formatted_args; } /** - * Get the search algorithm that should be used. + * If needed set the `aggs` ES query clause. * - * @since 4.3.0 - * @param string $search_text Search term(s) - * @param array $search_fields Search fields - * @param array $query_vars Query vars - * @return SearchAlgorithm Instance of search algorithm to be used + * @since 4.4.0 + * @param array $formatted_args Formatted Elasticsearch query. + * @param array $args WP_Query arguments + * @param array $filters Filters to be applied to the ES query + * @return array */ - public function get_search_algorithm( string $search_text, array $search_fields, array $query_vars ) : \ElasticPress\SearchAlgorithm { - $search_algorithm_version_option = \ElasticPress\Utils\get_option( 'ep_search_algorithm_version', '4.0' ); - + protected function maybe_set_aggs( $formatted_args, $args, $filters ) { /** - * Filter the algorithm version to be used. - * - * @since 3.5 - * @hook ep_search_algorithm_version - * @param {string} $search_algorithm_version Algorithm version. - * @return {string} New algorithm version + * Aggregations */ - $search_algorithm = apply_filters( 'ep_search_algorithm_version', $search_algorithm_version_option ); + if ( ! empty( $args['aggs'] ) && is_array( $args['aggs'] ) ) { + // Check if the array indexes are all numeric. + $agg_keys = array_keys( $args['aggs'] ); + $agg_num_keys = array_filter( $agg_keys, 'is_int' ); + $has_only_num_keys = count( $agg_num_keys ) === count( $args['aggs'] ); - /** - * Filter the search algorithm to be used - * - * @hook ep_{$indexable_slug}_search_algorithm - * @since 4.3.0 - * @param {string} $search_algorithm Slug of the search algorithm used as fallback - * @param {string} $search_term Search term - * @param {array} $search_fields Fields to be searched - * @param {array} $query_vars Query variables - * @return {string} New search algorithm slug - */ - $search_algorithm = apply_filters( "ep_{$this->slug}_search_algorithm", $search_algorithm, $search_text, $search_fields, $query_vars ); + if ( $has_only_num_keys ) { + foreach ( $args['aggs'] as $agg ) { + $formatted_args = $this->apply_aggregations( $formatted_args, $agg, ! empty( $filters ), $filters ); + } + } else { + // Single aggregation. + $formatted_args = $this->apply_aggregations( $formatted_args, $args['aggs'], ! empty( $filters ), $filters ); + } + } - return \ElasticPress\SearchAlgorithms::factory()->get( $search_algorithm ); + return $formatted_args; } } From cf43ec7ec66c1f161f692c63c4a2b5876b56f437 Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Mon, 3 Oct 2022 16:34:52 -0300 Subject: [PATCH 2/5] Add the `ep_post_filters` filter --- includes/classes/Indexable/Post/Post.php | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/includes/classes/Indexable/Post/Post.php b/includes/classes/Indexable/Post/Post.php index ffa779b1f4..47b26b2492 100644 --- a/includes/classes/Indexable/Post/Post.php +++ b/includes/classes/Indexable/Post/Post.php @@ -1441,10 +1441,23 @@ protected function parse_filters( $args, $query ) { 'post_status' => $this->parse_post_status( $args ), ]; - $filters = array_values( array_filter( $filters ) ); - $use_filters = ! empty( $filters ); + /** + * Filter the ES filters that will be applied to the ES query. + * + * Although each index of the `$filters` array contains the related WP Query argument, + * it will be removed before applied to the ES query. + * + * @hook ep_post_filters + * @param {array} Current filters + * @param {array} WP Query args + * @param {WP_Query} WP Query object + * @return {array} New filters + */ + $filters = apply_filters( 'ep_post_filters', $filters, $args, $query ); + + $filters = array_values( array_filter( $filters ) ); - if ( $use_filters ) { + if ( ! empty( $filters ) ) { $filters = [ 'bool' => [ 'must' => $filters, From de655b9d75d78bd3d2dfa6d03563393c25a7b8fd Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Mon, 3 Oct 2022 16:57:21 -0300 Subject: [PATCH 3/5] More refactoring --- includes/classes/Indexable/Post/Post.php | 239 +++++++++++------------ 1 file changed, 118 insertions(+), 121 deletions(-) diff --git a/includes/classes/Indexable/Post/Post.php b/includes/classes/Indexable/Post/Post.php index 47b26b2492..fb5d7ae0f3 100644 --- a/includes/classes/Indexable/Post/Post.php +++ b/includes/classes/Indexable/Post/Post.php @@ -1543,34 +1543,26 @@ protected function parse_from( $args ) { * @return int */ protected function parse_size( $args ) { - if ( ! empty( $args['posts_per_page'] ) ) { - $posts_per_page = (int) $args['posts_per_page']; + if ( empty( $args['posts_per_page'] ) ) { + return (int) get_option( 'posts_per_page' ); + } - // ES have a maximum size allowed so we have to convert "-1" to a maximum size. - if ( -1 === $posts_per_page ) { - /** - * Set the maximum results window size. - * - * The request will return a HTTP 500 Internal Error if the size of the - * request is larger than the [index.max_result_window] parameter in ES. - * See the scroll api for a more efficient way to request large data sets. - * - * @return int The max results window size. - * - * @since 2.3.0 - */ + $posts_per_page = (int) $args['posts_per_page']; - /** - * Filter max result size if set to -1 - * - * @hook ep_max_results_window - * @param {int} Max result window - * @return {int} New window - */ - $posts_per_page = apply_filters( 'ep_max_results_window', 10000 ); - } - } else { - $posts_per_page = (int) get_option( 'posts_per_page' ); + // ES have a maximum size allowed so we have to convert "-1" to a maximum size. + if ( -1 === $posts_per_page ) { + /** + * Filter max result size if set to -1 + * + * The request will return a HTTP 500 Internal Error if the size of the + * request is larger than the [index.max_result_window] parameter in ES. + * See the scroll api for a more efficient way to request large data sets. + * + * @hook ep_max_results_window + * @param {int} Max result window + * @return {int} New window + */ + $posts_per_page = apply_filters( 'ep_max_results_window', 10000 ); } return $posts_per_page; @@ -1732,19 +1724,19 @@ protected function parse_tax_queries( $args, $query ) { * @return array */ protected function parse_post_parent( $args ) { - if ( isset( $args['post_parent'] ) && '' !== $args['post_parent'] && 'any' !== strtolower( $args['post_parent'] ) ) { - return [ - 'bool' => [ - 'must' => [ - 'term' => [ - 'post_parent' => $args['post_parent'], - ], - ], - ], - ]; + if ( empty( $args['post_parent'] ) || 'any' === strtolower( $args['post_parent'] ) ) { + return []; } - return []; + return [ + 'bool' => [ + 'must' => [ + 'term' => [ + 'post_parent' => $args['post_parent'], + ], + ], + ], + ]; } /** @@ -1755,19 +1747,19 @@ protected function parse_post_parent( $args ) { * @return array */ protected function parse_post__in( $args ) { - if ( ! empty( $args['post__in'] ) ) { - return [ - 'bool' => [ - 'must' => [ - 'terms' => [ - 'post_id' => array_values( (array) $args['post__in'] ), - ], - ], - ], - ]; + if ( empty( $args['post__in'] ) ) { + return []; } - return []; + return [ + 'bool' => [ + 'must' => [ + 'terms' => [ + 'post_id' => array_values( (array) $args['post__in'] ), + ], + ], + ], + ]; } /** @@ -1778,19 +1770,19 @@ protected function parse_post__in( $args ) { * @return array */ protected function parse_post_name__in( $args ) { - if ( ! empty( $args['post_name__in'] ) ) { - return [ - 'bool' => [ - 'must' => [ - 'terms' => [ - 'post_name.raw' => array_values( (array) $args['post_name__in'] ), - ], + if ( empty( $args['post_name__in'] ) ){ + return []; + } + + return [ + 'bool' => [ + 'must' => [ + 'terms' => [ + 'post_name.raw' => array_values( (array) $args['post_name__in'] ), ], ], - ]; - } - - return []; + ], + ]; } /** @@ -1801,19 +1793,19 @@ protected function parse_post_name__in( $args ) { * @return array */ protected function parse_post__not_in( $args ) { - if ( ! empty( $args['post__not_in'] ) ) { - return [ - 'bool' => [ - 'must_not' => [ - 'terms' => [ - 'post_id' => (array) $args['post__not_in'], - ], + if ( empty( $args['post__not_in'] ) ) { + return []; + } + + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'post_id' => (array) $args['post__not_in'], ], ], - ]; - } - - return []; + ], + ]; } /** @@ -1824,19 +1816,19 @@ protected function parse_post__not_in( $args ) { * @return array */ protected function parse_category__not_in( $args ) { - if ( ! empty( $args['category__not_in'] ) ) { - return [ - 'bool' => [ - 'must_not' => [ - 'terms' => [ - 'terms.category.term_id' => array_values( (array) $args['category__not_in'] ), - ], - ], - ], - ]; + if ( empty( $args['category__not_in'] ) ) { + return []; } - return []; + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'terms.category.term_id' => array_values( (array) $args['category__not_in'] ), + ], + ], + ], + ]; } /** @@ -1847,19 +1839,19 @@ protected function parse_category__not_in( $args ) { * @return array */ protected function parse_tag__not_in( $args ) { - if ( ! empty( $args['tag__not_in'] ) ) { - return [ - 'bool' => [ - 'must_not' => [ - 'terms' => [ - 'terms.post_tag.term_id' => array_values( (array) $args['tag__not_in'] ), - ], - ], - ], - ]; + if ( empty( $args['tag__not_in'] ) ) { + return []; } - return []; + return [ + 'bool' => [ + 'must_not' => [ + 'terms' => [ + 'terms.post_tag.term_id' => array_values( (array) $args['tag__not_in'] ), + ], + ], + ], + ]; } /** @@ -1928,38 +1920,43 @@ protected function parse_author( $args ) { * @return array */ protected function parse_post_mime_type( $args ) { - if ( ! empty( $args['post_mime_type'] ) ) { - if ( is_array( $args['post_mime_type'] ) ) { - - $args_post_mime_type = []; - - foreach ( $args['post_mime_type'] as $mime_type ) { - /** - * check if matches the MIME type pattern: type/subtype and - * leave an empty string as posts, pages and CPTs don't have a MIME type - */ - if ( preg_match( '/^[-._a-z0-9]+\/[-._a-z0-9]+$/i', $mime_type ) || empty( $mime_type ) ) { - $args_post_mime_type[] = $mime_type; - } else { - $filtered_mime_type_by_type = wp_match_mime_types( $mime_type, wp_get_mime_types() ); - - $args_post_mime_type = array_merge( $args_post_mime_type, $filtered_mime_type_by_type[ $mime_type ] ); - } - } + if ( empty( $args['post_mime_type'] ) ) { + return []; + } - return [ - 'terms' => [ - 'post_mime_type' => $args_post_mime_type, - ], - ]; - } elseif ( is_string( $args['post_mime_type'] ) ) { - return [ - 'regexp' => array( - 'post_mime_type' => $args['post_mime_type'] . '.*', - ), - ]; + if ( is_array( $args['post_mime_type'] ) ) { + + $args_post_mime_type = []; + + foreach ( $args['post_mime_type'] as $mime_type ) { + /** + * check if matches the MIME type pattern: type/subtype and + * leave an empty string as posts, pages and CPTs don't have a MIME type + */ + if ( preg_match( '/^[-._a-z0-9]+\/[-._a-z0-9]+$/i', $mime_type ) || empty( $mime_type ) ) { + $args_post_mime_type[] = $mime_type; + } else { + $filtered_mime_type_by_type = wp_match_mime_types( $mime_type, wp_get_mime_types() ); + + $args_post_mime_type = array_merge( $args_post_mime_type, $filtered_mime_type_by_type[ $mime_type ] ); + } } + + return [ + 'terms' => [ + 'post_mime_type' => $args_post_mime_type, + ], + ]; + } + + if ( is_string( $args['post_mime_type'] ) ) { + return [ + 'regexp' => array( + 'post_mime_type' => $args['post_mime_type'] . '.*', + ), + ]; } + return []; } From 80e7f9174530986e3f6f5c1fd1a8a39ccdaf40bc Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Mon, 3 Oct 2022 16:59:09 -0300 Subject: [PATCH 4/5] code linting --- includes/classes/Indexable/Post/Post.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/includes/classes/Indexable/Post/Post.php b/includes/classes/Indexable/Post/Post.php index fb5d7ae0f3..13d8f9690c 100644 --- a/includes/classes/Indexable/Post/Post.php +++ b/includes/classes/Indexable/Post/Post.php @@ -1770,10 +1770,10 @@ protected function parse_post__in( $args ) { * @return array */ protected function parse_post_name__in( $args ) { - if ( empty( $args['post_name__in'] ) ){ + if ( empty( $args['post_name__in'] ) ) { return []; } - + return [ 'bool' => [ 'must' => [ @@ -1796,7 +1796,7 @@ protected function parse_post__not_in( $args ) { if ( empty( $args['post__not_in'] ) ) { return []; } - + return [ 'bool' => [ 'must_not' => [ @@ -1948,7 +1948,7 @@ protected function parse_post_mime_type( $args ) { ], ]; } - + if ( is_string( $args['post_mime_type'] ) ) { return [ 'regexp' => array( From 395ec3fd29caeec5bfbfb1eb12f0f0d362975451 Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Tue, 4 Oct 2022 13:55:48 -0300 Subject: [PATCH 5/5] Unit test for the ep_post_filters filter --- tests/php/indexables/TestPost.php | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/php/indexables/TestPost.php b/tests/php/indexables/TestPost.php index 827b0ba679..b9734636ab 100644 --- a/tests/php/indexables/TestPost.php +++ b/tests/php/indexables/TestPost.php @@ -6013,6 +6013,43 @@ public function testFormatArgsAggs() { $this->assertSame( 'terms.post_type', $args['aggs']['aggregation_name']['terms']['field'] ); } + /** + * Tests the `ep_post_filters` filter + * + * @return void + * @group post + */ + public function testFormatArgsEpPostFilter() { + $post = new \ElasticPress\Indexable\Post\Post(); + + $test_args = []; + $test_query = new \WP_Query( $test_args ); + + $add_es_filter = function( $filters, $args, $query ) use ( $test_query, $test_args ) { + $filters['new_filter'] = [ + 'term' => [ + 'my_custom_field.raw' => 'my_custom_value', + ], + ]; + + // Simple check if the filter additional parameters work. + $this->assertSame( $test_query, $query ); + $this->assertSame( $test_args, $args ); + + return $filters; + }; + add_filter( 'ep_post_filters', $add_es_filter, 10, 3 ); + + $args = $post->format_args( $test_args, $test_query ); + + $this->assertNotEmpty( $args['post_filter']['bool']['must'] ); + + $last_filter = end( $args['post_filter']['bool']['must'] ); + $this->assertSame( [ 'my_custom_field.raw' => 'my_custom_value' ], $last_filter['term'] ); + + remove_filter( 'ep_post_filters', $add_es_filter ); + } + /** * Tests additional order by parameters in parse_orderby(). *