jetpack-wpes-query-parser.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. <?php
  2. /**
  3. * Parse a pure text query into WordPress Elasticsearch query. This builds on
  4. * the Jetpack_WPES_Query_Builder() to provide search query parsing.
  5. *
  6. * The key part of this parser is taking a user's query string typed into a box
  7. * and converting it into an ES search query.
  8. *
  9. * This varies by application, but roughly it means extracting some parts of the query
  10. * (authors, tags, and phrases) that are treated as a filter. Then taking the
  11. * remaining words and building the correct query (possibly with prefix searching
  12. * if we are doing search as you type)
  13. *
  14. * This class only supports ES 2.x+
  15. *
  16. * This parser builds queries of the form:
  17. * bool:
  18. * must:
  19. * AND match of a single field (ideally an edgengram field)
  20. * filter:
  21. * filter clauses from context (eg @gibrown, #news, etc)
  22. * should:
  23. * boosting of results by various fields
  24. *
  25. * Features supported:
  26. * - search as you type
  27. * - phrases
  28. * - supports querying across multiple languages at once
  29. *
  30. * Example usage (from Search on Reader Manage):
  31. *
  32. * require_lib( 'jetpack-wpes-query-builder/jetpack-wpes-search-query-parser' );
  33. * $parser = new WPES_Search_Query_Parser( $args['q'], array( $lang ) );
  34. *
  35. * //author
  36. * $parser->author_field_filter( array(
  37. * 'prefixes' => array( '@' ),
  38. * 'wpcom_id_field' => 'author_id',
  39. * 'must_query_fields' => array( 'author.engram', 'author_login.engram' ),
  40. * 'boost_query_fields' => array( 'author^2', 'author_login^2', 'title.default.engram' ),
  41. * ) );
  42. *
  43. * //remainder of query
  44. * $match_content_fields = $parser->merge_ml_fields(
  45. * array(
  46. * 'all_content' => 0.1,
  47. * ),
  48. * array(
  49. * 'all_content.default.engram^0.1',
  50. * )
  51. * );
  52. * $boost_content_fields = $parser->merge_ml_fields(
  53. * array(
  54. * 'title' => 2,
  55. * 'description' => 1,
  56. * 'tags' => 1,
  57. * ),
  58. * array(
  59. * 'author_login^2',
  60. * 'author^2',
  61. * )
  62. * );
  63. *
  64. * $parser->phrase_filter( array(
  65. * 'must_query_fields' => $match_content_fields,
  66. * 'boost_query_fields' => $boost_content_fields,
  67. * ) );
  68. * $parser->remaining_query( array(
  69. * 'must_query_fields' => $match_content_fields,
  70. * 'boost_query_fields' => $boost_content_fields,
  71. * ) );
  72. *
  73. * //Boost on phrases
  74. * $parser->remaining_query( array(
  75. * 'boost_query_fields' => $boost_content_fields,
  76. * 'boost_query_type' => 'phrase',
  77. * ) );
  78. *
  79. * //boosting
  80. * $parser->add_max_boost_to_functions( 20 );
  81. * $parser->add_function( 'field_value_factor', array(
  82. * 'follower_count' => array(
  83. * 'modifier' => 'sqrt',
  84. * 'factor' => 1,
  85. * 'missing' => 0,
  86. * ) ) );
  87. *
  88. * //Filtering
  89. * $parser->add_filter( array(
  90. * 'exists' => array( 'field' => 'langs.' . $lang )
  91. * ) );
  92. *
  93. * //run the query
  94. * $es_query_args = array(
  95. * 'name' => 'feeds',
  96. * 'blog_id' => false,
  97. * 'security_strategy' => 'a8c',
  98. * 'type' => 'feed,blog',
  99. * 'fields' => array( 'blog_id', 'feed_id' ),
  100. * 'query' => $parser->build_query(),
  101. * 'filter' => $parser->build_filter(),
  102. * 'size' => $size,
  103. * 'from' => $from
  104. * );
  105. * $es_results = es_api_search_index( $es_query_args, 'api-feed-find' );
  106. *
  107. */
  108. jetpack_require_lib( 'jetpack-wpes-query-builder' );
  109. class Jetpack_WPES_Search_Query_Parser extends Jetpack_WPES_Query_Builder {
  110. protected $orig_query = '';
  111. protected $current_query = '';
  112. protected $langs;
  113. protected $avail_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' );
  114. public function __construct( $user_query, $langs ) {
  115. $this->orig_query = $user_query;
  116. $this->current_query = $this->orig_query;
  117. $this->langs = $this->norm_langs( $langs );
  118. }
  119. protected $extracted_phrases = array();
  120. ///////////////////////////////////////////////////////
  121. // Methods for Building arrays of multilingual fields
  122. /*
  123. * Normalize language codes
  124. */
  125. public function norm_langs( $langs ) {
  126. $lst = array();
  127. foreach( $langs as $l ) {
  128. $l = strtok( $l, '-_' );
  129. if ( in_array( $l, $this->avail_langs ) ) {
  130. $lst[$l] = true;
  131. } else {
  132. $lst['default'] = true;
  133. }
  134. }
  135. return array_keys( $lst );
  136. }
  137. /*
  138. * Take a list of field prefixes and expand them for multi-lingual
  139. * with the provided boostings.
  140. */
  141. public function merge_ml_fields( $fields2boosts, $additional_fields ) {
  142. $flds = array();
  143. foreach( $fields2boosts as $f => $b ) {
  144. foreach( $this->langs as $l ) {
  145. $flds[] = $f . '.' . $l . '^' . $b;
  146. }
  147. }
  148. foreach( $additional_fields as $f ) {
  149. $flds[] = $f;
  150. }
  151. return $flds;
  152. }
  153. ////////////////////////////////////
  154. // Extract Fields for Filtering on
  155. /*
  156. * Extract any @mentions from the user query
  157. * use them as a filter if we can find a wp.com id
  158. * otherwise use them as a
  159. *
  160. * args:
  161. * wpcom_id_field: wp.com id field
  162. * must_query_fields: array of fields to search for matching results (optional)
  163. * boost_query_fields: array of fields to search in for boosting results (optional)
  164. * prefixes: array of prefixes that the user can use to indicate an author
  165. *
  166. * returns true/false of whether any were found
  167. *
  168. * See also: https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java
  169. */
  170. public function author_field_filter( $args ) {
  171. $defaults = array(
  172. 'wpcom_id_field' => 'author_id',
  173. 'must_query_fields' => null,
  174. 'boost_query_fields' => null,
  175. 'prefixes' => array( '@' ),
  176. );
  177. $args = wp_parse_args( $args, $defaults );
  178. $names = array();
  179. foreach( $args['prefixes'] as $p ) {
  180. $found = $this->get_fields( $p );
  181. if ( $found ) {
  182. foreach( $found as $f ) {
  183. $names[] = $f;
  184. }
  185. }
  186. }
  187. if ( empty( $names ) ) {
  188. return false;
  189. }
  190. foreach( $args['prefixes'] as $p ) {
  191. $this->remove_fields( $p );
  192. }
  193. $user_ids = array();
  194. $query_names = array();
  195. //loop through the matches and separate into filters and queries
  196. foreach( $names as $n ) {
  197. //check for exact match on login
  198. $userdata = get_user_by( 'login', strtolower( $n ) );
  199. $filtering = false;
  200. if ( $userdata ) {
  201. $user_ids[ $userdata->ID ] = true;
  202. $filtering = true;
  203. }
  204. $is_phrase = false;
  205. if ( preg_match( '/"/', $n ) ) {
  206. $is_phrase = true;
  207. $n = preg_replace( '/"/', '', $n );
  208. }
  209. if ( !empty( $args['must_query_fields'] ) && !$filtering ) {
  210. if ( $is_phrase ) {
  211. $this->add_query( array(
  212. 'multi_match' => array(
  213. 'fields' => $args['must_query_fields'],
  214. 'query' => $n,
  215. 'type' => 'phrase',
  216. ) ) );
  217. } else {
  218. $this->add_query( array(
  219. 'multi_match' => array(
  220. 'fields' => $args['must_query_fields'],
  221. 'query' => $n,
  222. ) ) );
  223. }
  224. }
  225. if ( !empty( $args['boost_query_fields'] ) ) {
  226. if ( $is_phrase ) {
  227. $this->add_query( array(
  228. 'multi_match' => array(
  229. 'fields' => $args['boost_query_fields'],
  230. 'query' => $n,
  231. 'type' => 'phrase',
  232. ) ), 'should' );
  233. } else {
  234. $this->add_query( array(
  235. 'multi_match' => array(
  236. 'fields' => $args['boost_query_fields'],
  237. 'query' => $n,
  238. ) ), 'should' );
  239. }
  240. }
  241. }
  242. if ( ! empty( $user_ids ) ) {
  243. $user_ids = array_keys( $user_ids );
  244. $this->add_filter( array( 'terms' => array( $args['wpcom_id_field'] => $user_ids ) ) );
  245. }
  246. return true;
  247. }
  248. /*
  249. * Extract any prefix followed by text use them as a must clause,
  250. * and optionally as a boost to the should query
  251. * This can be used for hashtags. eg #News, or #"current events",
  252. * but also works for any arbitrary field. eg from:Greg
  253. *
  254. * args:
  255. * must_query_fields: array of fields that must match the tag (optional)
  256. * boost_query_fields: array of fields to boost search on (optional)
  257. * prefixes: array of prefixes that the user can use to indicate a tag
  258. *
  259. * returns true/false of whether any were found
  260. *
  261. */
  262. public function text_field_filter( $args ) {
  263. $defaults = array(
  264. 'must_query_fields' => array( 'tag.name' ),
  265. 'boost_query_fields' => array( 'tag.name' ),
  266. 'prefixes' => array( '#' ),
  267. );
  268. $args = wp_parse_args( $args, $defaults );
  269. $tags = array();
  270. foreach( $args['prefixes'] as $p ) {
  271. $found = $this->get_fields( $p );
  272. if ( $found ) {
  273. foreach( $found as $f ) {
  274. $tags[] = $f;
  275. }
  276. }
  277. }
  278. if ( empty( $tags ) ) {
  279. return false;
  280. }
  281. foreach( $args['prefixes'] as $p ) {
  282. $this->remove_fields( $p );
  283. }
  284. foreach( $tags as $t ) {
  285. $is_phrase = false;
  286. if ( preg_match( '/"/', $t ) ) {
  287. $is_phrase = true;
  288. $t = preg_replace( '/"/', '', $t );
  289. }
  290. if ( ! empty( $args['must_query_fields'] ) ) {
  291. if ( $is_phrase ) {
  292. $this->add_query( array(
  293. 'multi_match' => array(
  294. 'fields' => $args['must_query_fields'],
  295. 'query' => $t,
  296. 'type' => 'phrase',
  297. ) ) );
  298. } else {
  299. $this->add_query( array(
  300. 'multi_match' => array(
  301. 'fields' => $args['must_query_fields'],
  302. 'query' => $t,
  303. ) ) );
  304. }
  305. }
  306. if ( ! empty( $args['boost_query_fields'] ) ) {
  307. if ( $is_phrase ) {
  308. $this->add_query( array(
  309. 'multi_match' => array(
  310. 'fields' => $args['boost_query_fields'],
  311. 'query' => $t,
  312. 'type' => 'phrase',
  313. ) ), 'should' );
  314. } else {
  315. $this->add_query( array(
  316. 'multi_match' => array(
  317. 'fields' => $args['boost_query_fields'],
  318. 'query' => $t,
  319. ) ), 'should' );
  320. }
  321. }
  322. }
  323. return true;
  324. }
  325. /*
  326. * Extract anything surrounded by quotes or if there is an opening quote
  327. * that is not complete, and add them to the query as a phrase query.
  328. * Quotes can be either '' or ""
  329. *
  330. * args:
  331. * must_query_fields: array of fields that must match the phrases
  332. * boost_query_fields: array of fields to boost the phrases on (optional)
  333. *
  334. * returns true/false of whether any were found
  335. *
  336. */
  337. public function phrase_filter( $args ) {
  338. $defaults = array(
  339. 'must_query_fields' => array( 'all_content' ),
  340. 'boost_query_fields' => array( 'title' ),
  341. );
  342. $args = wp_parse_args( $args, $defaults );
  343. $phrases = array();
  344. if ( preg_match_all( '/"([^"]+)"/', $this->current_query, $matches ) ) {
  345. foreach ( $matches[1] as $match ) {
  346. $phrases[] = $match;
  347. }
  348. $this->current_query = preg_replace( '/"([^"]+)"/', '', $this->current_query );
  349. }
  350. if ( preg_match_all( "/'([^']+)'/", $this->current_query, $matches ) ) {
  351. foreach ( $matches[1] as $match ) {
  352. $phrases[] = $match;
  353. }
  354. $this->current_query = preg_replace( "/'([^']+)'/", '', $this->current_query );
  355. }
  356. //look for a final, uncompleted phrase
  357. $phrase_prefix = false;
  358. if ( preg_match_all( '/"([^"]+)$/', $this->current_query, $matches ) ) {
  359. $phrase_prefix = $matches[1][0];
  360. $this->current_query = preg_replace( '/"([^"]+)$/', '', $this->current_query );
  361. }
  362. if ( preg_match_all( "/(?:'\B|\B')([^']+)$/", $this->current_query, $matches ) ) {
  363. $phrase_prefix = $matches[1][0];
  364. $this->current_query = preg_replace( "/(?:'\B|\B')([^']+)$/", '', $this->current_query );
  365. }
  366. if ( $phrase_prefix ) {
  367. $phrases[] = $phrase_prefix;
  368. }
  369. if ( empty( $phrases ) ) {
  370. return false;
  371. }
  372. foreach ( $phrases as $p ) {
  373. $this->add_query( array(
  374. 'multi_match' => array(
  375. 'fields' => $args['must_query_fields'],
  376. 'query' => $p,
  377. 'type' => 'phrase',
  378. ) ) );
  379. if ( ! empty( $args['boost_query_fields'] ) ) {
  380. $this->add_query( array(
  381. 'multi_match' => array(
  382. 'fields' => $args['boost_query_fields'],
  383. 'query' => $p,
  384. 'operator' => 'and',
  385. ) ), 'should' );
  386. }
  387. }
  388. return true;
  389. }
  390. /*
  391. * Query fields based on the remaining parts of the query
  392. * This could be the final AND part of the query terms to match, or it
  393. * could be boosting certain elements of the query
  394. *
  395. * args:
  396. * must_query_fields: array of fields that must match the remaining terms (optional)
  397. * boost_query_fields: array of fields to boost the remaining terms on (optional)
  398. *
  399. */
  400. public function remaining_query( $args ) {
  401. $defaults = array(
  402. 'must_query_fields' => null,
  403. 'boost_query_fields' => null,
  404. 'boost_operator' => 'and',
  405. 'boost_query_type' => 'best_fields',
  406. );
  407. $args = wp_parse_args( $args, $defaults );
  408. if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
  409. return;
  410. }
  411. if ( ! empty( $args['must_query_fields'] ) ) {
  412. $this->add_query( array(
  413. 'multi_match' => array(
  414. 'fields' => $args['must_query_fields'],
  415. 'query' => $this->current_query,
  416. 'operator' => 'and',
  417. ) ) );
  418. }
  419. if ( ! empty( $args['boost_query_fields'] ) ) {
  420. $this->add_query( array(
  421. 'multi_match' => array(
  422. 'fields' => $args['boost_query_fields'],
  423. 'query' => $this->current_query,
  424. 'operator' => $args['boost_operator'],
  425. 'type' => $args['boost_query_type'],
  426. ) ), 'should' );
  427. }
  428. }
  429. /*
  430. * Query fields using a prefix query (alphabetical expansions on the index).
  431. * This is not recommended. Slower performance and worse relevancy.
  432. *
  433. * (UNTESTED! Copied from old prefix expansion code)
  434. *
  435. * args:
  436. * must_query_fields: array of fields that must match the remaining terms (optional)
  437. * boost_query_fields: array of fields to boost the remaining terms on (optional)
  438. *
  439. */
  440. public function remaining_prefix_query( $args ) {
  441. $defaults = array(
  442. 'must_query_fields' => array( 'all_content' ),
  443. 'boost_query_fields' => array( 'title' ),
  444. 'boost_operator' => 'and',
  445. 'boost_query_type' => 'best_fields',
  446. );
  447. $args = wp_parse_args( $args, $defaults );
  448. if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
  449. return;
  450. }
  451. //////////////////////////////////
  452. // Example cases to think about:
  453. // "elasticse"
  454. // "elasticsearch"
  455. // "elasticsearch "
  456. // "elasticsearch lucen"
  457. // "elasticsearch lucene"
  458. // "the future" - note the stopword which will match nothing!
  459. // "F1" - an exact match that also has tons of expansions
  460. // "こんにちは" ja "hello"
  461. // "こんにちは友人" ja "hello friend" - we just rely on the prefix phrase and ES to split words
  462. // - this could still be better I bet. Maybe we need to analyze with ES first?
  463. //
  464. /////////////////////////////
  465. //extract pieces of query
  466. // eg: "PREFIXREMAINDER PREFIXWORD"
  467. // "elasticsearch lucen"
  468. $prefix_word = false;
  469. $prefix_remainder = false;
  470. if ( preg_match_all( '/([^ ]+)$/', $this->current_query, $matches ) ) {
  471. $prefix_word = $matches[1][0];
  472. }
  473. $prefix_remainder = preg_replace( '/([^ ]+)$/', '', $this->current_query );
  474. if ( ctype_space( $prefix_remainder ) ) {
  475. $prefix_remainder = false;
  476. }
  477. if ( ! $prefix_word ) {
  478. //Space at the end of the query, so skip using a prefix query
  479. if ( ! empty( $args['must_query_fields'] ) ) {
  480. $this->add_query( array(
  481. 'multi_match' => array(
  482. 'fields' => $args['must_query_fields'],
  483. 'query' => $this->current_query,
  484. 'operator' => 'and',
  485. ) ) );
  486. }
  487. if ( ! empty( $args['boost_query_fields'] ) ) {
  488. $this->add_query( array(
  489. 'multi_match' => array(
  490. 'fields' => $args['boost_query_fields'],
  491. 'query' => $this->current_query,
  492. 'operator' => $args['boost_operator'],
  493. 'type' => $args['boost_query_type'],
  494. ) ), 'should' );
  495. }
  496. } else {
  497. //must match the prefix word and the prefix remainder
  498. if ( ! empty( $args['must_query_fields'] ) ) {
  499. //need to do an OR across a few fields to handle all cases
  500. $must_q = array( 'bool' => array( 'should' => array( ), 'minimum_should_match' => 1 ) );
  501. //treat all words as an exact search (boosts complete word like "news"
  502. //from prefixes of "newspaper")
  503. $must_q['bool']['should'][] = array( 'multi_match' => array(
  504. 'fields' => $this->all_fields,
  505. 'query' => $full_text,
  506. 'operator' => 'and',
  507. 'type' => 'cross_fields',
  508. ) );
  509. //always optimistically try and match the full text as a phrase
  510. //prefix "the futu" should try to match "the future"
  511. //otherwise the first stopword kinda breaks
  512. //This also works as the prefix match for a single word "elasticsea"
  513. $must_q['bool']['should'][] = array( 'multi_match' => array(
  514. 'fields' => $this->phrase_fields,
  515. 'query' => $full_text,
  516. 'operator' => 'and',
  517. 'type' => 'phrase_prefix',
  518. 'max_expansions' => 100,
  519. ) );
  520. if ( $prefix_remainder ) {
  521. //Multiple words found, so treat each word on its own and not just as
  522. //a part of a phrase
  523. //"elasticsearch lucen" => "elasticsearch" exact AND "lucen" prefix
  524. $q['bool']['should'][] = array( 'bool' => array(
  525. 'must' => array(
  526. array( 'multi_match' => array(
  527. 'fields' => $this->phrase_fields,
  528. 'query' => $prefix_word,
  529. 'operator' => 'and',
  530. 'type' => 'phrase_prefix',
  531. 'max_expansions' => 100,
  532. ) ),
  533. array( 'multi_match' => array(
  534. 'fields' => $this->all_fields,
  535. 'query' => $prefix_remainder,
  536. 'operator' => 'and',
  537. 'type' => 'cross_fields',
  538. ) ),
  539. )
  540. ) );
  541. }
  542. $this->add_query( $must_q );
  543. }
  544. //Now add any boosting of the query
  545. if ( ! empty( $args['boost_query_fields'] ) ) {
  546. //treat all words as an exact search (boosts complete word like "news"
  547. //from prefixes of "newspaper")
  548. $this->add_query( array(
  549. 'multi_match' => array(
  550. 'fields' => $args['boost_query_fields'],
  551. 'query' => $this->current_query,
  552. 'operator' => $args['boost_query_operator'],
  553. 'type' => $args['boost_query_type'],
  554. ) ), 'should' );
  555. //optimistically boost the full phrase prefix match
  556. $this->add_query( array(
  557. 'multi_match' => array(
  558. 'fields' => $args['boost_query_fields'],
  559. 'query' => $this->current_query,
  560. 'operator' => 'and',
  561. 'type' => 'phrase_prefix',
  562. 'max_expansions' => 100,
  563. ) ) );
  564. }
  565. }
  566. }
  567. /*
  568. * Boost results based on the lang probability overlaps
  569. *
  570. * args:
  571. * langs2prob: list of languages to search in with associated boosts
  572. */
  573. public function boost_lang_probs( $langs2prob ) {
  574. foreach( $langs2prob as $l => $p ) {
  575. $this->add_function( 'field_value_factor', array(
  576. 'modifier' => 'none',
  577. 'factor' => $p,
  578. 'missing' => 0.01, //1% chance doc did not have right lang detected
  579. ) );
  580. }
  581. }
  582. ////////////////////////////////////
  583. // Helper Methods
  584. //Get the text after some prefix. eg @gibrown, or @"Greg Brown"
  585. protected function get_fields( $field_prefix ) {
  586. $regex = '/' . $field_prefix . '(("[^"]+")|([^\\p{Z}]+))/';
  587. if ( preg_match_all( $regex, $this->current_query, $match ) ) {
  588. return $match[1];
  589. }
  590. return false;
  591. }
  592. //Remove the prefix and text from the query
  593. protected function remove_fields( $field_name ) {
  594. $regex = '/' . $field_name . '(("[^"]+")|([^\\p{Z}]+))/';
  595. $this->current_query = preg_replace( $regex, '', $this->current_query );
  596. }
  597. //Best effort string truncation that splits on word breaks
  598. protected function truncate_string( $string, $limit, $break=" " ) {
  599. if ( mb_strwidth( $string ) <= $limit ) {
  600. return $string;
  601. }
  602. // walk backwards from $limit to find first break
  603. $breakpoint = $limit;
  604. $broken = false;
  605. while ( $breakpoint > 0 ) {
  606. if ( $break === mb_strimwidth( $string, $breakpoint, 1 ) ) {
  607. $string = mb_strimwidth( $string, 0, $breakpoint );
  608. $broken = true;
  609. break;
  610. }
  611. $breakpoint--;
  612. }
  613. // if we weren't able to find a break, need to chop mid-word
  614. if ( !$broken ) {
  615. $string = mb_strimwidth( $string, 0, $limit );
  616. }
  617. return $string;
  618. }
  619. }