jetpack-wpes-query-builder.php 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. <?php
  2. /**
  3. * Provides an interface for easily building a complex search query that
  4. * combines multiple ranking signals.
  5. *
  6. *
  7. * $bldr = new Jetpack_WPES_Query_Builder();
  8. * $bldr->add_filter( ... );
  9. * $bldr->add_filter( ... );
  10. * $bldr->add_query( ... );
  11. * $es_query = $bldr->build_query();
  12. *
  13. *
  14. * All ES queries take a standard form with main query (with some filters),
  15. * wrapped in a function_score
  16. *
  17. * Bucketed queries use an aggregation to diversify results. eg a bunch
  18. * of separate filters where to get different sets of results.
  19. *
  20. */
  21. class Jetpack_WPES_Query_Builder {
  22. protected $es_filters = array();
  23. // Custom boosting with function_score
  24. protected $functions = array();
  25. protected $decays = array();
  26. protected $scripts = array();
  27. protected $functions_max_boost = 2.0;
  28. protected $functions_score_mode = 'multiply';
  29. protected $query_bool_boost = null;
  30. // General aggregations for buckets and metrics
  31. protected $aggs_query = false;
  32. protected $aggs = array();
  33. // The set of top level text queries to combine
  34. protected $must_queries = array();
  35. protected $should_queries = array();
  36. protected $dis_max_queries = array();
  37. protected $diverse_buckets_query = false;
  38. protected $bucket_filters = array();
  39. protected $bucket_sub_aggs = array();
  40. ////////////////////////////////////
  41. // Methods for building a query
  42. public function add_filter( $filter ) {
  43. $this->es_filters[] = $filter;
  44. }
  45. public function add_query( $query, $type = 'must' ) {
  46. switch ( $type ) {
  47. case 'dis_max':
  48. $this->dis_max_queries[] = $query;
  49. break;
  50. case 'should':
  51. $this->should_queries[] = $query;
  52. break;
  53. case 'must':
  54. default:
  55. $this->must_queries[] = $query;
  56. break;
  57. }
  58. }
  59. /**
  60. * Add a scoring function to the query
  61. *
  62. * NOTE: For decays (linear, exp, or gauss), use Jetpack_WPES_Query_Builder::add_decay() instead
  63. *
  64. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
  65. *
  66. * @param $function string name of the function
  67. * @param $params array functions parameters
  68. *
  69. * @return void
  70. */
  71. public function add_function( $function, $params ) {
  72. $this->functions[ $function ][] = $params;
  73. }
  74. /**
  75. * Add a decay function to score results
  76. *
  77. * This method should be used instead of Jetpack_WPES_Query_Builder::add_function() for decays, as the internal ES structure
  78. * is slightly different for them.
  79. *
  80. * @see https://www.elastic.co/guide/en/elasticsearch/guide/current/decay-functions.html
  81. *
  82. * @param $function string name of the decay function - linear, exp, or gauss
  83. * @param $params array The decay functions parameters, passed to ES directly
  84. *
  85. * @return void
  86. */
  87. public function add_decay( $function, $params ) {
  88. $this->decays[ $function ][] = $params;
  89. }
  90. /**
  91. * Add a scoring mode to the query
  92. *
  93. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
  94. *
  95. * @param $mode string name of how to score
  96. *
  97. * @return void
  98. */
  99. public function add_score_mode_to_functions( $mode='multiply' ) {
  100. $this->functions_score_mode = $mode;
  101. }
  102. public function add_max_boost_to_functions( $boost ) {
  103. $this->functions_max_boost = $boost;
  104. }
  105. public function add_boost_to_query_bool( $boost ) {
  106. $this->query_bool_boost = $boost;
  107. }
  108. public function add_aggs( $aggs_name, $aggs ) {
  109. $this->aggs_query = true;
  110. $this->aggs[$aggs_name] = $aggs;
  111. }
  112. public function add_aggs_sub_aggs( $aggs_name, $sub_aggs ) {
  113. if ( ! array_key_exists( 'aggs', $this->aggs[$aggs_name] ) ) {
  114. $this->aggs[$aggs_name]['aggs'] = array();
  115. }
  116. $this->aggs[$aggs_name]['aggs'] = $sub_aggs;
  117. }
  118. public function add_bucketed_query( $name, $query ) {
  119. $this->_add_bucket_filter( $name, $query );
  120. $this->add_query( $query, 'dis_max' );
  121. }
  122. public function add_bucketed_terms( $name, $field, $terms, $boost = 1 ) {
  123. if ( ! is_array( $terms ) ) {
  124. $terms = array( $terms );
  125. }
  126. $this->_add_bucket_filter( $name, array(
  127. 'terms' => array(
  128. $field => $terms,
  129. ),
  130. ));
  131. $this->add_query( array(
  132. 'constant_score' => array(
  133. 'filter' => array(
  134. 'terms' => array(
  135. $field => $terms,
  136. ),
  137. ),
  138. 'boost' => $boost,
  139. ),
  140. ), 'dis_max' );
  141. }
  142. public function add_bucket_sub_aggs( $agg ) {
  143. $this->bucket_sub_aggs = array_merge( $this->bucket_sub_aggs, $agg );
  144. }
  145. protected function _add_bucket_filter( $name, $filter ) {
  146. $this->diverse_buckets_query = true;
  147. $this->bucket_filters[ $name ] = $filter;
  148. }
  149. ////////////////////////////////////
  150. // Building Final Query
  151. /**
  152. * Combine all the queries, functions, decays, scripts, and max_boost into an ES query
  153. *
  154. * @return array Array representation of the built ES query
  155. */
  156. public function build_query() {
  157. $query = array();
  158. //dis_max queries just become a single must query
  159. if ( ! empty( $this->dis_max_queries ) ) {
  160. $this->must_queries[] = array(
  161. 'dis_max' => array(
  162. 'queries' => $this->dis_max_queries,
  163. ),
  164. );
  165. }
  166. if ( empty( $this->must_queries ) ) {
  167. $this->must_queries = array(
  168. array(
  169. 'match_all' => array(),
  170. ),
  171. );
  172. }
  173. if ( empty( $this->should_queries ) ) {
  174. if ( 1 == count( $this->must_queries ) ) {
  175. $query = $this->must_queries[0];
  176. } else {
  177. $query = array(
  178. 'bool' => array(
  179. 'must' => $this->must_queries,
  180. ),
  181. );
  182. }
  183. } else {
  184. $query = array(
  185. 'bool' => array(
  186. 'must' => $this->must_queries,
  187. 'should' => $this->should_queries,
  188. ),
  189. );
  190. }
  191. if ( ! is_null( $this->query_bool_boost ) && isset( $query['bool'] ) ) {
  192. $query['bool']['boost'] = $this->query_bool_boost;
  193. }
  194. // If there are any function score adjustments, then combine those
  195. if ( $this->functions || $this->decays || $this->scripts ) {
  196. $weighting_functions = array();
  197. if ( $this->functions ) {
  198. foreach ( $this->functions as $function_type => $configs ) {
  199. foreach ( $configs as $config ) {
  200. foreach ( $config as $field => $params ) {
  201. $func_arr = $params;
  202. $func_arr['field'] = $field;
  203. $weighting_functions[] = array(
  204. $function_type => $func_arr,
  205. );
  206. }
  207. }
  208. }
  209. }
  210. if ( $this->decays ) {
  211. foreach ( $this->decays as $decay_type => $configs ) {
  212. foreach ( $configs as $config ) {
  213. foreach ( $config as $field => $params ) {
  214. $weighting_functions[] = array(
  215. $decay_type => array(
  216. $field => $params,
  217. ),
  218. );
  219. }
  220. }
  221. }
  222. }
  223. if ( $this->scripts ) {
  224. foreach ( $this->scripts as $script ) {
  225. $weighting_functions[] = array(
  226. 'script_score' => array(
  227. 'script' => $script,
  228. ),
  229. );
  230. }
  231. }
  232. $query = array(
  233. 'function_score' => array(
  234. 'query' => $query,
  235. 'functions' => $weighting_functions,
  236. 'max_boost' => $this->functions_max_boost,
  237. 'score_mode' => $this->functions_score_mode,
  238. ),
  239. );
  240. } // End if().
  241. return $query;
  242. }
  243. /**
  244. * Assemble the 'filter' portion of an ES query, from all registered filters
  245. *
  246. * @return array|null Combined ES filters, or null if none have been defined
  247. */
  248. public function build_filter() {
  249. if ( empty( $this->es_filters ) ) {
  250. $filter = null;
  251. } elseif ( 1 == count( $this->es_filters ) ) {
  252. $filter = $this->es_filters[0];
  253. } else {
  254. $filter = array(
  255. 'and' => $this->es_filters,
  256. );
  257. }
  258. return $filter;
  259. }
  260. /**
  261. * Assemble the 'aggregation' portion of an ES query, from all general aggregations.
  262. *
  263. * @return array An aggregation query as an array of topics, filters, and bucket names
  264. */
  265. public function build_aggregation() {
  266. if ( empty( $this->bucket_sub_aggs ) && empty( $this->aggs_query ) ) {
  267. return array();
  268. }
  269. if ( ! $this->diverse_buckets_query && empty( $this->aggs_query ) ) {
  270. return $this->bucket_sub_aggs;
  271. }
  272. $aggregations = array(
  273. 'topics' => array(
  274. 'filters' => array(
  275. 'filters' => array(),
  276. ),
  277. ),
  278. );
  279. if ( ! empty( $this->bucket_sub_aggs ) ) {
  280. $aggregations['topics']['aggs'] = $this->bucket_sub_aggs;
  281. }
  282. foreach ( $this->bucket_filters as $bucket_name => $filter ) {
  283. $aggregations['topics']['filters']['filters'][ $bucket_name ] = $filter;
  284. }
  285. if ( ! empty( $this->aggs_query ) ) {
  286. $aggregations = $this->aggs;
  287. }
  288. return $aggregations;
  289. }
  290. }