query_engine.hpp 79 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592
  1. /*************************************************************************
  2. *
  3. * Copyright 2016 Realm Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. **************************************************************************/
  18. /*
  19. A query consists of node objects, one for each query condition. Each node contains pointers to all other nodes:
  20. node1 node2 node3
  21. ------ ----- -----
  22. node2* node1* node1*
  23. node3* node3* node2*
  24. The construction of all this takes part in query.cpp. Each node has two important functions:
  25. aggregate(start, end)
  26. aggregate_local(start, end)
  27. The aggregate() function executes the aggregate of a query. You can call the method on any of the nodes
  28. (except children nodes of OrNode and SubtableNode) - it has the same behaviour. The function contains
  29. scheduling that calls aggregate_local(start, end) on different nodes with different start/end ranges,
  30. depending on what it finds is most optimal.
  31. The aggregate_local() function contains a tight loop that tests the condition of its own node, and upon match
  32. it tests all other conditions at that index to report a full match or not. It will remain in the tight loop
  33. after a full match.
  34. So a call stack with 2 and 9 being local matches of a node could look like this:
  35. aggregate(0, 10)
  36. node1->aggregate_local(0, 3)
  37. node2->find_first_local(2, 3)
  38. node3->find_first_local(2, 3)
  39. node3->aggregate_local(3, 10)
  40. node1->find_first_local(4, 5)
  41. node2->find_first_local(4, 5)
  42. node1->find_first_local(7, 8)
  43. node2->find_first_local(7, 8)
  44. find_first_local(n, n + 1) is a function that can be used to test a single row of another condition. Note that
  45. this is very simplified. There are other statistical arguments to the methods, and also, find_first_local() can be
  46. called from a callback function called by an integer Array.
  47. Template arguments in methods:
  48. ----------------------------------------------------------------------------------------------------
  49. TConditionFunction: Each node has a condition from query_conditions.c such as Equal, GreaterEqual, etc
  50. TConditionValue: Type of values in condition column. That is, int64_t, float, int, bool, etc
  51. TAction: What to do with each search result, from the enums act_ReturnFirst, act_Count, act_Sum, etc
  52. TResult: Type of result of actions - float, double, int64_t, etc. Special notes: For act_Count it's
  53. int64_t, for RLM_FIND_ALL it's int64_t which points at destination array.
  54. TSourceColumn: Type of source column used in actions, or *ignored* if no source column is used (like for
  55. act_Count, act_ReturnFirst)
  56. There are two important classes used in queries:
  57. ----------------------------------------------------------------------------------------------------
  58. SequentialGetter Column iterator used to get successive values with leaf caching. Used both for condition columns
  59. and aggregate source column
  60. AggregateState State of the aggregate - contains a state variable that stores intermediate sum, max, min,
  61. etc, etc.
  62. */
  63. #ifndef REALM_QUERY_ENGINE_HPP
  64. #define REALM_QUERY_ENGINE_HPP
  65. #include <algorithm>
  66. #include <functional>
  67. #include <sstream>
  68. #include <string>
  69. #include <array>
  70. #include <realm/array_basic.hpp>
  71. #include <realm/array_key.hpp>
  72. #include <realm/array_string.hpp>
  73. #include <realm/array_binary.hpp>
  74. #include <realm/array_integer_tpl.hpp>
  75. #include <realm/array_timestamp.hpp>
  76. #include <realm/array_decimal128.hpp>
  77. #include <realm/array_fixed_bytes.hpp>
  78. #include <realm/array_mixed.hpp>
  79. #include <realm/array_list.hpp>
  80. #include <realm/array_bool.hpp>
  81. #include <realm/array_backlink.hpp>
  82. #include <realm/column_type_traits.hpp>
  83. #include <realm/metrics/query_info.hpp>
  84. #include <realm/query_conditions.hpp>
  85. #include <realm/table.hpp>
  86. #include <realm/column_integer.hpp>
  87. #include <realm/unicode.hpp>
  88. #include <realm/util/miscellaneous.hpp>
  89. #include <realm/util/serializer.hpp>
  90. #include <realm/utilities.hpp>
  91. #include <realm/index_string.hpp>
  92. #include <map>
  93. #include <unordered_set>
  94. #if REALM_X86_OR_X64_TRUE && defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
  95. #include <immintrin.h>
  96. #endif
  97. namespace realm {
  98. class IndexEvaluator;
  99. // Number of matches to find in best condition loop before breaking out to probe other conditions. Too low value gives
  100. // too many constant time overheads everywhere in the query engine. Too high value makes it adapt less rapidly to
  101. // changes in match frequencies.
  102. const size_t findlocals = 64;
  103. // Average match distance in linear searches where further increase in distance no longer increases query speed
  104. // (because time spent on handling each match becomes insignificant compared to time spent on the search).
  105. const size_t bestdist = 512;
  106. // Minimum number of matches required in a certain condition before it can be used to compute statistics. Too high
  107. // value can spent too much time in a bad node (with high match frequency). Too low value gives inaccurate statistics.
  108. const size_t probe_matches = 4;
  109. const size_t bitwidth_time_unit = 64;
  110. typedef bool (*CallbackDummy)(int64_t);
  111. using Evaluator = util::FunctionRef<bool(const Obj& obj)>;
  112. class ParentNode {
  113. typedef ParentNode ThisType;
  114. public:
  115. ParentNode() = default;
  116. virtual ~ParentNode() = default;
  117. virtual bool has_search_index() const
  118. {
  119. return false;
  120. }
  121. virtual const IndexEvaluator* index_based_keys()
  122. {
  123. return nullptr;
  124. }
  125. void gather_children(std::vector<ParentNode*>& v)
  126. {
  127. m_children.clear();
  128. size_t i = v.size();
  129. v.push_back(this);
  130. if (m_child)
  131. m_child->gather_children(v);
  132. m_children = v;
  133. m_children.erase(m_children.begin() + i);
  134. m_children.insert(m_children.begin(), this);
  135. }
  136. double cost() const
  137. {
  138. // dt = 1/64 to 1. Match dist is 8 times more important than bitwidth
  139. return 8 * bitwidth_time_unit / m_dD + m_dT;
  140. }
  141. size_t find_first(size_t start, size_t end);
  142. bool match(const Obj& obj);
  143. virtual void init(bool will_query_ranges)
  144. {
  145. m_dD = 100.0;
  146. if (m_child)
  147. m_child->init(will_query_ranges);
  148. }
  149. void get_link_dependencies(std::vector<TableKey>& tables) const
  150. {
  151. collect_dependencies(tables);
  152. if (m_child)
  153. m_child->get_link_dependencies(tables);
  154. }
  155. void set_table(ConstTableRef table)
  156. {
  157. if (table == m_table)
  158. return;
  159. m_table = table;
  160. if (m_child)
  161. m_child->set_table(table);
  162. table_changed();
  163. }
  164. void set_cluster(const Cluster* cluster)
  165. {
  166. m_cluster = cluster;
  167. if (m_child)
  168. m_child->set_cluster(cluster);
  169. cluster_changed();
  170. }
  171. virtual void collect_dependencies(std::vector<TableKey>&) const
  172. {
  173. }
  174. virtual size_t find_first_local(size_t start, size_t end) = 0;
  175. virtual size_t find_all_local(size_t start, size_t end);
  176. virtual size_t aggregate_local(QueryStateBase* st, size_t start, size_t end, size_t local_limit,
  177. ArrayPayload* source_column);
  178. virtual std::string validate()
  179. {
  180. return m_child ? m_child->validate() : "";
  181. }
  182. ParentNode(const ParentNode& from);
  183. void add_child(std::unique_ptr<ParentNode> child)
  184. {
  185. if (m_child)
  186. m_child->add_child(std::move(child));
  187. else
  188. m_child = std::move(child);
  189. }
  190. virtual std::unique_ptr<ParentNode> clone() const = 0;
  191. virtual std::string describe(util::serializer::SerialisationState&) const
  192. {
  193. return "";
  194. }
  195. virtual std::string describe_condition() const
  196. {
  197. return "matches";
  198. }
  199. virtual std::string describe_expression(util::serializer::SerialisationState& state) const
  200. {
  201. std::string s;
  202. s = describe(state);
  203. if (m_child) {
  204. s = s + " and " + m_child->describe_expression(state);
  205. }
  206. return s;
  207. }
  208. bool consume_condition(ParentNode& other, bool ignore_indexes)
  209. {
  210. // We can only combine conditions if they're the same operator on the
  211. // same column and there's no additional conditions ANDed on
  212. if (m_condition_column_key != other.m_condition_column_key)
  213. return false;
  214. if (m_child || other.m_child)
  215. return false;
  216. if (typeid(*this) != typeid(other))
  217. return false;
  218. // If a search index is present, don't try to combine conditions since index search is most likely faster.
  219. // Assuming N elements to search and M conditions to check:
  220. // 1) search index present: O(log(N)*M)
  221. // 2) no search index, combine conditions: O(N)
  222. // 3) no search index, conditions not combined: O(N*M)
  223. // In practice N is much larger than M, so if we have a search index, choose 1, otherwise if possible
  224. // choose 2. The exception is if we're inside a Not group or if the query is restricted to a view, as in those
  225. // cases end will always be start+1 and we'll have O(N*M) runtime even with a search index, so we want to
  226. // combine even with an index.
  227. if (has_search_index() && !ignore_indexes)
  228. return false;
  229. return do_consume_condition(other);
  230. }
  231. std::unique_ptr<ParentNode> m_child;
  232. std::vector<ParentNode*> m_children;
  233. mutable ColKey m_condition_column_key = ColKey(); // Column of search criteria
  234. ArrayPayload* m_source_column = nullptr;
  235. double m_dD; // Average row distance between each local match at current position
  236. double m_dT = 1.0; // Time overhead of testing index i + 1 if we have just tested index i. > 1 for linear scans, 0
  237. // for index/tableview
  238. size_t m_probes = 0;
  239. size_t m_matches = 0;
  240. protected:
  241. ConstTableRef m_table = ConstTableRef();
  242. const Cluster* m_cluster = nullptr;
  243. QueryStateBase* m_state = nullptr;
  244. ColumnType get_real_column_type(ColKey key)
  245. {
  246. return m_table.unchecked_ptr()->get_real_column_type(key);
  247. }
  248. private:
  249. virtual void table_changed()
  250. {
  251. }
  252. virtual void cluster_changed()
  253. {
  254. // TODO: Should eventually be pure
  255. }
  256. virtual bool do_consume_condition(ParentNode&)
  257. {
  258. return false;
  259. }
  260. };
  261. class ColumnNodeBase : public ParentNode {
  262. protected:
  263. ColumnNodeBase(ColKey column_key)
  264. {
  265. m_condition_column_key = column_key;
  266. }
  267. ColumnNodeBase(const ColumnNodeBase& from)
  268. : ParentNode(from)
  269. {
  270. }
  271. };
  272. class IndexEvaluator {
  273. public:
  274. IndexEvaluator() {}
  275. virtual ~IndexEvaluator() = default;
  276. void init(StringIndex* index, Mixed value);
  277. void init(std::vector<ObjKey>* storage);
  278. size_t do_search_index(const Cluster* cluster, size_t start, size_t end);
  279. size_t size() const
  280. {
  281. if (m_matching_keys) {
  282. return m_matching_keys->size();
  283. }
  284. return m_results_end - m_results_start;
  285. }
  286. ObjKey get(size_t ndx) const
  287. {
  288. return get_internal(ndx + m_results_start);
  289. }
  290. private:
  291. inline ObjKey get_internal(size_t ndx) const
  292. {
  293. if (m_matching_keys) {
  294. return m_matching_keys->at(ndx);
  295. }
  296. if (m_index_matches) {
  297. return ObjKey(m_index_matches->get(ndx));
  298. }
  299. else if (m_results_end == 1) {
  300. REALM_ASSERT_EX(ndx == 0, ndx);
  301. return m_actual_key;
  302. }
  303. return ObjKey();
  304. }
  305. std::shared_ptr<IntegerColumn> m_index_matches;
  306. ObjKey m_actual_key;
  307. ObjKey m_last_start_key;
  308. size_t m_results_start = 0;
  309. size_t m_results_ndx = 0;
  310. size_t m_results_end = 0;
  311. std::vector<ObjKey>* m_matching_keys = nullptr;
  312. };
  313. template <class LeafType>
  314. class IntegerNodeBase : public ColumnNodeBase {
  315. public:
  316. using TConditionValue = typename LeafType::value_type;
  317. // static const bool nullable = ColType::nullable;
  318. protected:
  319. IntegerNodeBase(TConditionValue value, ColKey column_key)
  320. : ColumnNodeBase(column_key)
  321. , m_value(std::move(value))
  322. {
  323. }
  324. IntegerNodeBase(const IntegerNodeBase& from)
  325. : ColumnNodeBase(from)
  326. , m_value(from.m_value)
  327. {
  328. }
  329. void cluster_changed() override
  330. {
  331. // Assigning nullptr will cause the Leaf destructor to be called. Must
  332. // be done before assigning a new one. Otherwise the destructor will be
  333. // called after the constructor is called and that is unfortunate if
  334. // the object has the same address. (As in this case)
  335. m_array_ptr = nullptr;
  336. // Create new Leaf
  337. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) LeafType(m_table.unchecked_ptr()->get_alloc()));
  338. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  339. m_leaf_ptr = m_array_ptr.get();
  340. }
  341. void init(bool will_query_ranges) override
  342. {
  343. ColumnNodeBase::init(will_query_ranges);
  344. m_dT = .25;
  345. }
  346. bool run_single() const
  347. {
  348. if (m_source_column == nullptr)
  349. return true;
  350. // Compare leafs to see if they are the same
  351. auto leaf = dynamic_cast<LeafType*>(m_source_column);
  352. return leaf ? leaf->get_ref() == m_leaf_ptr->get_ref() : false;
  353. }
  354. template <class TConditionFunction>
  355. size_t find_all_local(size_t start, size_t end)
  356. {
  357. if (run_single()) {
  358. m_leaf_ptr->template find<TConditionFunction>(m_value, start, end, m_state, nullptr);
  359. }
  360. else {
  361. auto callback = [this](size_t index) {
  362. auto val = m_source_column->get_any(index);
  363. return m_state->match(index, val);
  364. };
  365. m_leaf_ptr->template find<TConditionFunction>(m_value, start, end, m_state, callback);
  366. }
  367. return end;
  368. }
  369. std::string describe(util::serializer::SerialisationState& state) const override
  370. {
  371. return state.describe_column(ParentNode::m_table, ColumnNodeBase::m_condition_column_key) + " " +
  372. describe_condition() + " " + util::serializer::print_value(this->m_value);
  373. }
  374. // Search value:
  375. TConditionValue m_value;
  376. // Leaf cache
  377. using LeafCacheStorage = typename std::aligned_storage<sizeof(LeafType), alignof(LeafType)>::type;
  378. using LeafPtr = std::unique_ptr<LeafType, PlacementDelete>;
  379. LeafCacheStorage m_leaf_cache_storage;
  380. LeafPtr m_array_ptr;
  381. const LeafType* m_leaf_ptr = nullptr;
  382. };
  383. template <class LeafType, class TConditionFunction>
  384. class IntegerNode : public IntegerNodeBase<LeafType> {
  385. using BaseType = IntegerNodeBase<LeafType>;
  386. using ThisType = IntegerNode<LeafType, TConditionFunction>;
  387. public:
  388. using TConditionValue = typename BaseType::TConditionValue;
  389. IntegerNode(TConditionValue value, ColKey column_key)
  390. : BaseType(value, column_key)
  391. {
  392. }
  393. IntegerNode(const IntegerNode& from)
  394. : BaseType(from)
  395. {
  396. }
  397. size_t find_first_local(size_t start, size_t end) override
  398. {
  399. return this->m_leaf_ptr->template find_first<TConditionFunction>(this->m_value, start, end);
  400. }
  401. size_t find_all_local(size_t start, size_t end) override
  402. {
  403. return BaseType::template find_all_local<TConditionFunction>(start, end);
  404. }
  405. std::string describe_condition() const override
  406. {
  407. return TConditionFunction::description();
  408. }
  409. std::unique_ptr<ParentNode> clone() const override
  410. {
  411. return std::unique_ptr<ParentNode>(new ThisType(*this));
  412. }
  413. };
  414. template <size_t linear_search_threshold, class LeafType, class NeedleContainer>
  415. static size_t find_first_haystack(LeafType& leaf, NeedleContainer& needles, size_t start, size_t end)
  416. {
  417. // for a small number of conditions, it is faster to do a linear search than to compute the hash
  418. // the exact thresholds were found experimentally
  419. if (needles.size() < linear_search_threshold) {
  420. for (size_t i = start; i < end; ++i) {
  421. auto element = leaf.get(i);
  422. if (std::find(needles.begin(), needles.end(), element) != needles.end())
  423. return i;
  424. }
  425. }
  426. else {
  427. for (size_t i = start; i < end; ++i) {
  428. auto element = leaf.get(i);
  429. if (needles.count(element))
  430. return i;
  431. }
  432. }
  433. return realm::npos;
  434. }
  435. template <class LeafType>
  436. class IntegerNode<LeafType, Equal> : public IntegerNodeBase<LeafType> {
  437. public:
  438. using BaseType = IntegerNodeBase<LeafType>;
  439. using TConditionValue = typename BaseType::TConditionValue;
  440. using ThisType = IntegerNode<LeafType, Equal>;
  441. IntegerNode(TConditionValue value, ColKey column_key)
  442. : BaseType(value, column_key)
  443. {
  444. }
  445. void init(bool will_query_ranges) override
  446. {
  447. BaseType::init(will_query_ranges);
  448. m_nb_needles = m_needles.size();
  449. if (has_search_index() && m_nb_needles == 0) {
  450. StringIndex* index = ParentNode::m_table->get_search_index(ParentNode::m_condition_column_key);
  451. m_index_evaluator = IndexEvaluator();
  452. m_index_evaluator->init(index, BaseType::m_value);
  453. IntegerNodeBase<LeafType>::m_dT = 0;
  454. }
  455. }
  456. bool do_consume_condition(ParentNode& node) override
  457. {
  458. auto& other = static_cast<ThisType&>(node);
  459. REALM_ASSERT(this->m_condition_column_key == other.m_condition_column_key);
  460. REALM_ASSERT(other.m_needles.empty());
  461. if (m_needles.empty()) {
  462. m_needles.insert(this->m_value);
  463. }
  464. m_needles.insert(other.m_value);
  465. return true;
  466. }
  467. bool has_search_index() const override
  468. {
  469. return this->m_table->search_index_type(IntegerNodeBase<LeafType>::m_condition_column_key) ==
  470. IndexType::General;
  471. }
  472. const IndexEvaluator* index_based_keys() override
  473. {
  474. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  475. }
  476. size_t find_first_local(size_t start, size_t end) override
  477. {
  478. REALM_ASSERT(this->m_table);
  479. size_t s = realm::npos;
  480. if (start < end) {
  481. if (m_nb_needles) {
  482. s = find_first_haystack<22>(*this->m_leaf_ptr, m_needles, start, end);
  483. }
  484. else if (m_index_evaluator) {
  485. return m_index_evaluator->do_search_index(BaseType::m_cluster, start, end);
  486. }
  487. else if (end - start == 1) {
  488. if (this->m_leaf_ptr->get(start) == this->m_value) {
  489. s = start;
  490. }
  491. }
  492. else {
  493. s = this->m_leaf_ptr->template find_first<Equal>(this->m_value, start, end);
  494. }
  495. }
  496. return s;
  497. }
  498. size_t find_all_local(size_t start, size_t end) override
  499. {
  500. return BaseType::template find_all_local<Equal>(start, end);
  501. }
  502. std::string describe(util::serializer::SerialisationState& state) const override
  503. {
  504. REALM_ASSERT(this->m_condition_column_key);
  505. std::string col_descr = state.describe_column(this->m_table, this->m_condition_column_key);
  506. if (m_needles.empty()) {
  507. return col_descr + " " + Equal::description() + " " +
  508. util::serializer::print_value(IntegerNodeBase<LeafType>::m_value);
  509. }
  510. std::string list_contents;
  511. bool is_first = true;
  512. for (auto it : m_needles) {
  513. list_contents +=
  514. util::format("%1%2", is_first ? "" : ", ", util::serializer::print_value(it)); // "it" may be null
  515. is_first = false;
  516. }
  517. std::string desc = util::format("%1 IN {%2}", col_descr, list_contents);
  518. return desc;
  519. }
  520. std::unique_ptr<ParentNode> clone() const override
  521. {
  522. return std::unique_ptr<ParentNode>(new ThisType(*this));
  523. }
  524. private:
  525. std::unordered_set<TConditionValue> m_needles;
  526. size_t m_nb_needles = 0;
  527. std::optional<IndexEvaluator> m_index_evaluator;
  528. IntegerNode(const IntegerNode<LeafType, Equal>& from)
  529. : BaseType(from)
  530. , m_needles(from.m_needles)
  531. {
  532. }
  533. };
  534. // This node is currently used for floats and doubles only
  535. template <class LeafType, class TConditionFunction>
  536. class FloatDoubleNode : public ParentNode {
  537. public:
  538. using TConditionValue = typename LeafType::value_type;
  539. static const bool special_null_node = false;
  540. FloatDoubleNode(TConditionValue v, ColKey column_key)
  541. : m_value(v)
  542. {
  543. m_condition_column_key = column_key;
  544. m_dT = 1.0;
  545. }
  546. FloatDoubleNode(null, ColKey column_key)
  547. : m_value(null::get_null_float<TConditionValue>())
  548. {
  549. m_condition_column_key = column_key;
  550. m_dT = 1.0;
  551. }
  552. void cluster_changed() override
  553. {
  554. // Assigning nullptr will cause the Leaf destructor to be called. Must
  555. // be done before assigning a new one. Otherwise the destructor will be
  556. // called after the constructor is called and that is unfortunate if
  557. // the object has the same address. (As in this case)
  558. m_array_ptr = nullptr;
  559. // Create new Leaf
  560. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) LeafType(m_table.unchecked_ptr()->get_alloc()));
  561. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  562. m_leaf_ptr = m_array_ptr.get();
  563. }
  564. size_t find_first_local(size_t start, size_t end) override
  565. {
  566. TConditionFunction cond;
  567. auto find = [&](bool nullability) {
  568. bool m_value_nan = nullability ? null::is_null_float(m_value) : false;
  569. for (size_t s = start; s < end; ++s) {
  570. TConditionValue v = m_leaf_ptr->get(s);
  571. REALM_ASSERT(!(null::is_null_float(v) && !nullability));
  572. if (cond(v, m_value, nullability ? null::is_null_float<TConditionValue>(v) : false, m_value_nan))
  573. return s;
  574. }
  575. return not_found;
  576. };
  577. // This will inline the second case but no the first. Todo, use templated lambda when switching to c++14
  578. if (m_table->is_nullable(m_condition_column_key))
  579. return find(true);
  580. else
  581. return find(false);
  582. }
  583. std::string describe(util::serializer::SerialisationState& state) const override
  584. {
  585. REALM_ASSERT(m_condition_column_key);
  586. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
  587. util::serializer::print_value(FloatDoubleNode::m_value);
  588. }
  589. std::string describe_condition() const override
  590. {
  591. return TConditionFunction::description();
  592. }
  593. std::unique_ptr<ParentNode> clone() const override
  594. {
  595. return std::unique_ptr<ParentNode>(new FloatDoubleNode(*this));
  596. }
  597. FloatDoubleNode(const FloatDoubleNode& from)
  598. : ParentNode(from)
  599. , m_value(from.m_value)
  600. {
  601. }
  602. protected:
  603. TConditionValue m_value;
  604. // Leaf cache
  605. using LeafCacheStorage = typename std::aligned_storage<sizeof(LeafType), alignof(LeafType)>::type;
  606. using LeafPtr = std::unique_ptr<LeafType, PlacementDelete>;
  607. LeafCacheStorage m_leaf_cache_storage;
  608. LeafPtr m_array_ptr;
  609. const LeafType* m_leaf_ptr = nullptr;
  610. };
  611. template <class T, class TConditionFunction>
  612. class SizeNode : public ParentNode {
  613. public:
  614. SizeNode(int64_t v, ColKey column)
  615. : m_value(v)
  616. {
  617. m_condition_column_key = column;
  618. m_dT = 20.0;
  619. }
  620. void cluster_changed() override
  621. {
  622. // Assigning nullptr will cause the Leaf destructor to be called. Must
  623. // be done before assigning a new one. Otherwise the destructor will be
  624. // called after the constructor is called and that is unfortunate if
  625. // the object has the same address. (As in this case)
  626. m_array_ptr = nullptr;
  627. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) LeafType(m_table.unchecked_ptr()->get_alloc()));
  628. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  629. m_leaf_ptr = m_array_ptr.get();
  630. }
  631. size_t find_first_local(size_t start, size_t end) override
  632. {
  633. for (size_t s = start; s < end; ++s) {
  634. T v = m_leaf_ptr->get(s);
  635. if (v) {
  636. int64_t sz = v.size();
  637. if (TConditionFunction()(sz, m_value))
  638. return s;
  639. }
  640. }
  641. return not_found;
  642. }
  643. std::unique_ptr<ParentNode> clone() const override
  644. {
  645. return std::unique_ptr<ParentNode>(new SizeNode(*this));
  646. }
  647. SizeNode(const SizeNode& from)
  648. : ParentNode(from)
  649. , m_value(from.m_value)
  650. {
  651. }
  652. private:
  653. // Leaf cache
  654. using LeafType = typename ColumnTypeTraits<T>::cluster_leaf_type;
  655. using LeafCacheStorage = typename std::aligned_storage<sizeof(LeafType), alignof(LeafType)>::type;
  656. using LeafPtr = std::unique_ptr<LeafType, PlacementDelete>;
  657. LeafCacheStorage m_leaf_cache_storage;
  658. LeafPtr m_array_ptr;
  659. const LeafType* m_leaf_ptr = nullptr;
  660. int64_t m_value;
  661. };
  662. extern size_t size_of_list_from_ref(ref_type ref, Allocator& alloc, ColumnType col_type, bool nullable);
  663. template <class TConditionFunction>
  664. class SizeListNode : public ParentNode {
  665. public:
  666. SizeListNode(int64_t v, ColKey column)
  667. : m_value(v)
  668. {
  669. m_condition_column_key = column;
  670. m_dT = 30.0;
  671. }
  672. void reset_cache()
  673. {
  674. m_cached_col_type = m_condition_column_key.get_type();
  675. m_cached_nullable = m_condition_column_key.is_nullable();
  676. REALM_ASSERT_DEBUG(m_condition_column_key.is_list());
  677. }
  678. void cluster_changed() override
  679. {
  680. // Assigning nullptr will cause the Leaf destructor to be called. Must
  681. // be done before assigning a new one. Otherwise the destructor will be
  682. // called after the constructor is called and that is unfortunate if
  683. // the object has the same address. (As in this case)
  684. m_array_ptr = nullptr;
  685. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayList(m_table.unchecked_ptr()->get_alloc()));
  686. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  687. m_leaf_ptr = m_array_ptr.get();
  688. reset_cache();
  689. }
  690. void init(bool will_query_ranges) override
  691. {
  692. ParentNode::init(will_query_ranges);
  693. reset_cache();
  694. }
  695. size_t find_first_local(size_t start, size_t end) override
  696. {
  697. Allocator& alloc = m_table.unchecked_ptr()->get_alloc();
  698. for (size_t s = start; s < end; ++s) {
  699. ref_type ref = m_leaf_ptr->get(s);
  700. if (ref) {
  701. int64_t sz = size_of_list_from_ref(ref, alloc, m_cached_col_type, m_cached_nullable);
  702. if (TConditionFunction()(sz, m_value))
  703. return s;
  704. }
  705. }
  706. return not_found;
  707. }
  708. std::unique_ptr<ParentNode> clone() const override
  709. {
  710. return std::unique_ptr<ParentNode>(new SizeListNode(*this));
  711. }
  712. SizeListNode(const SizeListNode& from)
  713. : ParentNode(from)
  714. , m_value(from.m_value)
  715. {
  716. }
  717. private:
  718. // Leaf cache
  719. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayList), alignof(ArrayList)>::type;
  720. using LeafPtr = std::unique_ptr<ArrayList, PlacementDelete>;
  721. LeafCacheStorage m_leaf_cache_storage;
  722. LeafPtr m_array_ptr;
  723. const ArrayList* m_leaf_ptr = nullptr;
  724. int64_t m_value;
  725. ColumnType m_cached_col_type;
  726. bool m_cached_nullable;
  727. };
  728. template <class TConditionFunction>
  729. class BinaryNode : public ParentNode {
  730. public:
  731. using TConditionValue = BinaryData;
  732. static const bool special_null_node = false;
  733. BinaryNode(BinaryData v, ColKey column)
  734. : m_value(v)
  735. {
  736. m_condition_column_key = column;
  737. m_dT = 100.0;
  738. }
  739. BinaryNode(null, ColKey column)
  740. : BinaryNode(BinaryData{}, column)
  741. {
  742. }
  743. void cluster_changed() override
  744. {
  745. m_array_ptr = nullptr;
  746. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayBinary(m_table.unchecked_ptr()->get_alloc()));
  747. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  748. m_leaf_ptr = m_array_ptr.get();
  749. }
  750. size_t find_first_local(size_t start, size_t end) override
  751. {
  752. TConditionFunction condition;
  753. for (size_t s = start; s < end; ++s) {
  754. BinaryData value = m_leaf_ptr->get(s);
  755. if (condition(m_value.get(), value))
  756. return s;
  757. }
  758. return not_found;
  759. }
  760. virtual std::string describe(util::serializer::SerialisationState& state) const override
  761. {
  762. REALM_ASSERT(m_condition_column_key);
  763. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
  764. TConditionFunction::description() + " " + util::serializer::print_value(BinaryNode::m_value.get());
  765. }
  766. std::unique_ptr<ParentNode> clone() const override
  767. {
  768. return std::unique_ptr<ParentNode>(new BinaryNode(*this));
  769. }
  770. BinaryNode(const BinaryNode& from)
  771. : ParentNode(from)
  772. , m_value(from.m_value)
  773. {
  774. }
  775. private:
  776. OwnedBinaryData m_value;
  777. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayBinary), alignof(ArrayBinary)>::type;
  778. using LeafPtr = std::unique_ptr<ArrayBinary, PlacementDelete>;
  779. LeafCacheStorage m_leaf_cache_storage;
  780. LeafPtr m_array_ptr;
  781. const ArrayBinary* m_leaf_ptr = nullptr;
  782. };
  783. template <class TConditionFunction>
  784. class BoolNode : public ParentNode {
  785. public:
  786. using TConditionValue = bool;
  787. BoolNode(util::Optional<bool> v, ColKey column)
  788. : m_value(v)
  789. {
  790. m_condition_column_key = column;
  791. }
  792. BoolNode(const BoolNode& from)
  793. : ParentNode(from)
  794. , m_value(from.m_value)
  795. , m_index_evaluator(from.m_index_evaluator)
  796. {
  797. }
  798. void init(bool will_query_ranges) override
  799. {
  800. ParentNode::init(will_query_ranges);
  801. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  802. if (m_index_evaluator) {
  803. StringIndex* index = m_table->get_search_index(m_condition_column_key);
  804. m_index_evaluator->init(index, m_value);
  805. this->m_dT = 0;
  806. }
  807. }
  808. }
  809. void table_changed() override
  810. {
  811. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  812. const bool has_index = m_table->search_index_type(m_condition_column_key) == IndexType::General;
  813. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  814. }
  815. }
  816. const IndexEvaluator* index_based_keys() override
  817. {
  818. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  819. }
  820. bool has_search_index() const override
  821. {
  822. return bool(m_index_evaluator);
  823. }
  824. void cluster_changed() override
  825. {
  826. m_array_ptr = nullptr;
  827. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayBoolNull(m_table.unchecked_ptr()->get_alloc()));
  828. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  829. m_leaf_ptr = m_array_ptr.get();
  830. }
  831. size_t find_first_local(size_t start, size_t end) override
  832. {
  833. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  834. if (m_index_evaluator) {
  835. return m_index_evaluator->do_search_index(m_cluster, start, end);
  836. }
  837. }
  838. TConditionFunction condition;
  839. bool m_value_is_null = !m_value;
  840. for (size_t s = start; s < end; ++s) {
  841. util::Optional<bool> value = m_leaf_ptr->get(s);
  842. if (condition(value, m_value, !value, m_value_is_null))
  843. return s;
  844. }
  845. return not_found;
  846. }
  847. virtual std::string describe(util::serializer::SerialisationState& state) const override
  848. {
  849. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
  850. TConditionFunction::description() + " " + util::serializer::print_value(m_value);
  851. }
  852. std::unique_ptr<ParentNode> clone() const override
  853. {
  854. return std::unique_ptr<ParentNode>(new BoolNode(*this));
  855. }
  856. private:
  857. util::Optional<bool> m_value;
  858. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayBoolNull), alignof(ArrayBoolNull)>::type;
  859. using LeafPtr = std::unique_ptr<ArrayBoolNull, PlacementDelete>;
  860. LeafCacheStorage m_leaf_cache_storage;
  861. LeafPtr m_array_ptr;
  862. const ArrayBoolNull* m_leaf_ptr = nullptr;
  863. std::optional<IndexEvaluator> m_index_evaluator;
  864. };
  865. class TimestampNodeBase : public ParentNode {
  866. public:
  867. using TConditionValue = Timestamp;
  868. static const bool special_null_node = false;
  869. TimestampNodeBase(Timestamp v, ColKey column)
  870. : m_value(v)
  871. {
  872. m_condition_column_key = column;
  873. m_dT = 2.0;
  874. }
  875. TimestampNodeBase(null, ColKey column)
  876. : TimestampNodeBase(Timestamp{}, column)
  877. {
  878. }
  879. void cluster_changed() override
  880. {
  881. m_array_ptr = nullptr;
  882. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayTimestamp(m_table.unchecked_ptr()->get_alloc()));
  883. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  884. m_leaf_ptr = m_array_ptr.get();
  885. }
  886. protected:
  887. TimestampNodeBase(const TimestampNodeBase& from)
  888. : ParentNode(from)
  889. , m_value(from.m_value)
  890. {
  891. }
  892. Timestamp m_value;
  893. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayTimestamp), alignof(ArrayTimestamp)>::type;
  894. using LeafPtr = std::unique_ptr<ArrayTimestamp, PlacementDelete>;
  895. LeafCacheStorage m_leaf_cache_storage;
  896. LeafPtr m_array_ptr;
  897. const ArrayTimestamp* m_leaf_ptr = nullptr;
  898. };
  899. template <class TConditionFunction>
  900. class TimestampNode : public TimestampNodeBase {
  901. public:
  902. using TimestampNodeBase::TimestampNodeBase;
  903. void init(bool will_query_ranges) override
  904. {
  905. TimestampNodeBase::init(will_query_ranges);
  906. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  907. if (m_index_evaluator) {
  908. StringIndex* index =
  909. TimestampNodeBase::m_table->get_search_index(TimestampNodeBase::m_condition_column_key);
  910. m_index_evaluator->init(index, TimestampNodeBase::m_value);
  911. this->m_dT = 0;
  912. }
  913. }
  914. }
  915. void table_changed() override
  916. {
  917. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  918. const bool has_index =
  919. this->m_table->search_index_type(TimestampNodeBase::m_condition_column_key) == IndexType::General;
  920. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  921. }
  922. }
  923. const IndexEvaluator* index_based_keys() override
  924. {
  925. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  926. }
  927. bool has_search_index() const override
  928. {
  929. return bool(m_index_evaluator);
  930. }
  931. size_t find_first_local(size_t start, size_t end) override
  932. {
  933. if constexpr (std::is_same_v<TConditionFunction, Equal>) {
  934. if (m_index_evaluator) {
  935. return m_index_evaluator->do_search_index(this->m_cluster, start, end);
  936. }
  937. }
  938. return m_leaf_ptr->find_first<TConditionFunction>(m_value, start, end);
  939. }
  940. std::string describe(util::serializer::SerialisationState& state) const override
  941. {
  942. REALM_ASSERT(m_condition_column_key);
  943. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
  944. TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value);
  945. }
  946. std::unique_ptr<ParentNode> clone() const override
  947. {
  948. return std::unique_ptr<ParentNode>(new TimestampNode(*this));
  949. }
  950. protected:
  951. TimestampNode(const TimestampNode& from, Transaction* tr)
  952. : TimestampNodeBase(from, tr)
  953. {
  954. }
  955. std::optional<IndexEvaluator> m_index_evaluator;
  956. };
  957. class DecimalNodeBase : public ParentNode {
  958. public:
  959. using TConditionValue = Decimal128;
  960. static const bool special_null_node = false;
  961. DecimalNodeBase(Decimal128 v, ColKey column)
  962. : m_value(v)
  963. {
  964. m_condition_column_key = column;
  965. }
  966. DecimalNodeBase(null, ColKey column)
  967. : DecimalNodeBase(Decimal128{null()}, column)
  968. {
  969. }
  970. void cluster_changed() override
  971. {
  972. m_array_ptr = nullptr;
  973. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayDecimal128(m_table.unchecked_ptr()->get_alloc()));
  974. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  975. m_leaf_ptr = m_array_ptr.get();
  976. }
  977. void init(bool will_query_ranges) override
  978. {
  979. ParentNode::init(will_query_ranges);
  980. m_dD = 100.0;
  981. }
  982. protected:
  983. DecimalNodeBase(const DecimalNodeBase& from)
  984. : ParentNode(from)
  985. , m_value(from.m_value)
  986. {
  987. }
  988. Decimal128 m_value;
  989. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayDecimal128), alignof(ArrayDecimal128)>::type;
  990. using LeafPtr = std::unique_ptr<ArrayDecimal128, PlacementDelete>;
  991. LeafCacheStorage m_leaf_cache_storage;
  992. LeafPtr m_array_ptr;
  993. const ArrayDecimal128* m_leaf_ptr = nullptr;
  994. };
  995. template <class TConditionFunction>
  996. class DecimalNode : public DecimalNodeBase {
  997. public:
  998. using DecimalNodeBase::DecimalNodeBase;
  999. size_t find_first_local(size_t start, size_t end) override
  1000. {
  1001. TConditionFunction cond;
  1002. bool value_is_null = m_value.is_null();
  1003. for (size_t i = start; i < end; i++) {
  1004. Decimal128 val = m_leaf_ptr->get(i);
  1005. if (cond(val, m_value, val.is_null(), value_is_null))
  1006. return i;
  1007. }
  1008. return realm::npos;
  1009. }
  1010. std::string describe(util::serializer::SerialisationState& state) const override
  1011. {
  1012. REALM_ASSERT(m_condition_column_key);
  1013. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
  1014. TConditionFunction::description() + " " + util::serializer::print_value(DecimalNode::m_value);
  1015. }
  1016. std::unique_ptr<ParentNode> clone() const override
  1017. {
  1018. return std::unique_ptr<ParentNode>(new DecimalNode(*this));
  1019. }
  1020. protected:
  1021. DecimalNode(const DecimalNode& from, Transaction* tr)
  1022. : DecimalNodeBase(from, tr)
  1023. {
  1024. }
  1025. };
  1026. template <class ObjectType, class ArrayType>
  1027. class FixedBytesNodeBase : public ParentNode {
  1028. public:
  1029. using TConditionValue = ObjectType;
  1030. static const bool special_null_node = false;
  1031. FixedBytesNodeBase(ObjectType v, ColKey column)
  1032. : m_value(v)
  1033. {
  1034. m_condition_column_key = column;
  1035. }
  1036. FixedBytesNodeBase(null, ColKey column)
  1037. : FixedBytesNodeBase(ObjectType{}, column)
  1038. {
  1039. m_value_is_null = true;
  1040. }
  1041. void cluster_changed() override
  1042. {
  1043. m_array_ptr = nullptr;
  1044. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayType(m_table.unchecked_ptr()->get_alloc()));
  1045. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  1046. m_leaf_ptr = m_array_ptr.get();
  1047. }
  1048. void init(bool will_query_ranges) override
  1049. {
  1050. ParentNode::init(will_query_ranges);
  1051. m_dD = 100.0;
  1052. }
  1053. protected:
  1054. FixedBytesNodeBase(const FixedBytesNodeBase& from)
  1055. : ParentNode(from)
  1056. , m_value(from.m_value)
  1057. , m_value_is_null(from.m_value_is_null)
  1058. {
  1059. }
  1060. ObjectType m_value;
  1061. bool m_value_is_null = false;
  1062. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayType), alignof(ArrayType)>::type;
  1063. using LeafPtr = std::unique_ptr<ArrayType, PlacementDelete>;
  1064. LeafCacheStorage m_leaf_cache_storage;
  1065. LeafPtr m_array_ptr;
  1066. const ArrayType* m_leaf_ptr = nullptr;
  1067. };
  1068. template <class TConditionFunction, class ObjectType, class ArrayType>
  1069. class FixedBytesNode : public FixedBytesNodeBase<ObjectType, ArrayType> {
  1070. public:
  1071. using FixedBytesNodeBase<ObjectType, ArrayType>::FixedBytesNodeBase;
  1072. size_t find_first_local(size_t start, size_t end) override
  1073. {
  1074. TConditionFunction cond;
  1075. for (size_t i = start; i < end; i++) {
  1076. util::Optional<ObjectType> val = this->m_leaf_ptr->get(i);
  1077. if (val) {
  1078. if (cond(*val, this->m_value, false, this->m_value_is_null))
  1079. return i;
  1080. }
  1081. else {
  1082. if (cond(ObjectType{}, this->m_value, true, this->m_value_is_null))
  1083. return i;
  1084. }
  1085. }
  1086. return realm::npos;
  1087. }
  1088. std::string describe(util::serializer::SerialisationState& state) const override
  1089. {
  1090. REALM_ASSERT(this->m_condition_column_key);
  1091. return state.describe_column(ParentNode::m_table, this->m_condition_column_key) + " " +
  1092. TConditionFunction::description() + " " +
  1093. (this->m_value_is_null ? util::serializer::print_value(realm::null())
  1094. : util::serializer::print_value(this->m_value));
  1095. }
  1096. std::unique_ptr<ParentNode> clone() const override
  1097. {
  1098. return std::unique_ptr<ParentNode>(new FixedBytesNode(*this));
  1099. }
  1100. protected:
  1101. FixedBytesNode(const FixedBytesNode& from, Transaction* tr)
  1102. : FixedBytesNode(from, tr)
  1103. {
  1104. }
  1105. };
  1106. template <class ObjectType, class ArrayType>
  1107. class FixedBytesNode<Equal, ObjectType, ArrayType> : public FixedBytesNodeBase<ObjectType, ArrayType> {
  1108. public:
  1109. using FixedBytesNodeBase<ObjectType, ArrayType>::FixedBytesNodeBase;
  1110. using BaseType = FixedBytesNodeBase<ObjectType, ArrayType>;
  1111. void init(bool will_query_ranges) override
  1112. {
  1113. BaseType::init(will_query_ranges);
  1114. if (!this->m_value_is_null) {
  1115. m_optional_value = this->m_value;
  1116. }
  1117. if (m_index_evaluator) {
  1118. StringIndex* index = BaseType::m_table->get_search_index(BaseType::m_condition_column_key);
  1119. m_index_evaluator->init(index, m_optional_value);
  1120. this->m_dT = 0;
  1121. }
  1122. }
  1123. void table_changed() override
  1124. {
  1125. const bool has_index =
  1126. this->m_table->search_index_type(BaseType::m_condition_column_key) == IndexType::General;
  1127. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  1128. }
  1129. const IndexEvaluator* index_based_keys() override
  1130. {
  1131. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  1132. }
  1133. bool has_search_index() const override
  1134. {
  1135. return bool(m_index_evaluator);
  1136. }
  1137. size_t find_first_local(size_t start, size_t end) override
  1138. {
  1139. REALM_ASSERT(this->m_table);
  1140. size_t s = realm::npos;
  1141. if (start < end) {
  1142. if (m_index_evaluator) {
  1143. return m_index_evaluator->do_search_index(this->m_cluster, start, end);
  1144. }
  1145. if (end - start == 1) {
  1146. if (this->m_leaf_ptr->get(start) == m_optional_value) {
  1147. s = start;
  1148. }
  1149. }
  1150. else {
  1151. s = this->m_leaf_ptr->find_first(m_optional_value, start, end);
  1152. }
  1153. }
  1154. return s;
  1155. }
  1156. std::string describe(util::serializer::SerialisationState& state) const override
  1157. {
  1158. REALM_ASSERT(this->m_condition_column_key);
  1159. return state.describe_column(ParentNode::m_table, this->m_condition_column_key) + " " + Equal::description() +
  1160. " " +
  1161. (this->m_value_is_null ? util::serializer::print_value(realm::null())
  1162. : util::serializer::print_value(this->m_value));
  1163. }
  1164. std::unique_ptr<ParentNode> clone() const override
  1165. {
  1166. return std::unique_ptr<ParentNode>(new FixedBytesNode(*this));
  1167. }
  1168. protected:
  1169. FixedBytesNode(const FixedBytesNode& from, Transaction* tr)
  1170. : FixedBytesNode(from, tr)
  1171. {
  1172. }
  1173. std::optional<ObjectType> m_optional_value;
  1174. std::optional<IndexEvaluator> m_index_evaluator;
  1175. };
  1176. template <typename T>
  1177. using ObjectIdNode = FixedBytesNode<T, ObjectId, ArrayObjectIdNull>;
  1178. template <typename T>
  1179. using UUIDNode = FixedBytesNode<T, UUID, ArrayUUIDNull>;
  1180. class MixedNodeBase : public ParentNode {
  1181. public:
  1182. using TConditionValue = Mixed;
  1183. static const bool special_null_node = false;
  1184. MixedNodeBase(Mixed v, ColKey column)
  1185. : m_value(v)
  1186. , m_value_is_null(v.is_null())
  1187. {
  1188. REALM_ASSERT(column.get_type() == col_type_Mixed);
  1189. get_ownership();
  1190. m_condition_column_key = column;
  1191. }
  1192. MixedNodeBase(null, ColKey column)
  1193. : MixedNodeBase(Mixed{}, column)
  1194. {
  1195. m_value_is_null = true;
  1196. }
  1197. void cluster_changed() override
  1198. {
  1199. m_array_ptr = nullptr;
  1200. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayMixed(m_table.unchecked_ptr()->get_alloc()));
  1201. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  1202. m_leaf_ptr = m_array_ptr.get();
  1203. }
  1204. void init(bool will_query_ranges) override
  1205. {
  1206. ParentNode::init(will_query_ranges);
  1207. m_dD = 100.0;
  1208. }
  1209. std::string describe(util::serializer::SerialisationState& state) const override
  1210. {
  1211. REALM_ASSERT(m_condition_column_key);
  1212. std::string value;
  1213. if (m_value.is_type(type_TypedLink)) {
  1214. value = util::serializer::print_value(m_value.get<ObjLink>(), state.group);
  1215. }
  1216. else {
  1217. value = util::serializer::print_value(m_value);
  1218. }
  1219. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + this->describe_condition() +
  1220. " " + value;
  1221. }
  1222. protected:
  1223. MixedNodeBase(const MixedNodeBase& from)
  1224. : ParentNode(from)
  1225. , m_value(from.m_value)
  1226. , m_value_is_null(from.m_value_is_null)
  1227. {
  1228. get_ownership();
  1229. }
  1230. void get_ownership()
  1231. {
  1232. if (m_value.is_type(type_String, type_Binary)) {
  1233. auto bin = m_value.get_binary();
  1234. m_buffer = OwnedBinaryData(bin.data(), bin.size());
  1235. auto tmp = m_buffer.get();
  1236. if (m_value.is_type(type_String)) {
  1237. m_value = Mixed(StringData(tmp.data(), tmp.size()));
  1238. }
  1239. else {
  1240. m_value = Mixed(tmp);
  1241. }
  1242. }
  1243. }
  1244. QueryValue m_value;
  1245. OwnedBinaryData m_buffer;
  1246. bool m_value_is_null = false;
  1247. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayMixed), alignof(ArrayMixed)>::type;
  1248. using LeafPtr = std::unique_ptr<ArrayMixed, PlacementDelete>;
  1249. LeafCacheStorage m_leaf_cache_storage;
  1250. LeafPtr m_array_ptr;
  1251. const ArrayMixed* m_leaf_ptr = nullptr;
  1252. };
  1253. template <class TConditionFunction>
  1254. class MixedNode : public MixedNodeBase {
  1255. public:
  1256. using MixedNodeBase::MixedNodeBase;
  1257. size_t find_first_local(size_t start, size_t end) override
  1258. {
  1259. TConditionFunction cond;
  1260. for (size_t i = start; i < end; i++) {
  1261. QueryValue val(m_leaf_ptr->get(i));
  1262. if constexpr (realm::is_any_v<TConditionFunction, BeginsWith, BeginsWithIns, EndsWith, EndsWithIns, Like,
  1263. LikeIns, NotEqualIns, Contains, ContainsIns>) {
  1264. // For some strange reason the parameters are swapped for string conditions
  1265. if (cond(m_value, val))
  1266. return i;
  1267. }
  1268. else {
  1269. if (cond(val, m_value))
  1270. return i;
  1271. }
  1272. }
  1273. return realm::npos;
  1274. }
  1275. virtual std::string describe_condition() const override
  1276. {
  1277. return TConditionFunction::description();
  1278. }
  1279. std::unique_ptr<ParentNode> clone() const override
  1280. {
  1281. return std::unique_ptr<ParentNode>(new MixedNode(*this));
  1282. }
  1283. protected:
  1284. MixedNode(const MixedNode& from, Transaction* tr)
  1285. : MixedNode(from, tr)
  1286. {
  1287. }
  1288. };
  1289. template <>
  1290. class MixedNode<Equal> : public MixedNodeBase {
  1291. public:
  1292. MixedNode(Mixed v, ColKey column)
  1293. : MixedNodeBase(v, column)
  1294. {
  1295. }
  1296. MixedNode(const MixedNode<Equal>& other)
  1297. : MixedNodeBase(other)
  1298. , m_index_evaluator(other.m_index_evaluator)
  1299. {
  1300. }
  1301. void init(bool will_query_ranges) override;
  1302. void cluster_changed() override
  1303. {
  1304. // If we use searchindex, we do not need further access to clusters
  1305. if (!has_search_index()) {
  1306. MixedNodeBase::cluster_changed();
  1307. }
  1308. }
  1309. void table_changed() override
  1310. {
  1311. const bool has_index =
  1312. m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General;
  1313. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  1314. }
  1315. bool has_search_index() const override
  1316. {
  1317. return bool(m_index_evaluator);
  1318. }
  1319. size_t find_first_local(size_t start, size_t end) override;
  1320. virtual std::string describe_condition() const override
  1321. {
  1322. return Equal::description();
  1323. }
  1324. std::unique_ptr<ParentNode> clone() const override
  1325. {
  1326. return std::unique_ptr<ParentNode>(new MixedNode<Equal>(*this));
  1327. }
  1328. protected:
  1329. std::optional<IndexEvaluator> m_index_evaluator;
  1330. const IndexEvaluator* index_based_keys() override
  1331. {
  1332. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  1333. }
  1334. };
  1335. template <>
  1336. class MixedNode<EqualIns> : public MixedNodeBase {
  1337. public:
  1338. MixedNode(Mixed v, ColKey column)
  1339. : MixedNodeBase(v, column)
  1340. {
  1341. }
  1342. MixedNode(const MixedNode<EqualIns>& other)
  1343. : MixedNodeBase(other)
  1344. , m_index_evaluator(other.m_index_evaluator)
  1345. {
  1346. }
  1347. void init(bool will_query_ranges) override;
  1348. size_t find_first_local(size_t start, size_t end) override;
  1349. void cluster_changed() override
  1350. {
  1351. // If we use searchindex, we do not need further access to clusters
  1352. if (!has_search_index()) {
  1353. MixedNodeBase::cluster_changed();
  1354. }
  1355. }
  1356. void table_changed() override
  1357. {
  1358. const bool has_index =
  1359. m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General;
  1360. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  1361. }
  1362. bool has_search_index() const override
  1363. {
  1364. return bool(m_index_evaluator);
  1365. }
  1366. virtual std::string describe_condition() const override
  1367. {
  1368. return EqualIns::description();
  1369. }
  1370. std::unique_ptr<ParentNode> clone() const override
  1371. {
  1372. return std::unique_ptr<ParentNode>(new MixedNode<EqualIns>(*this));
  1373. }
  1374. protected:
  1375. std::string m_ucase;
  1376. std::string m_lcase;
  1377. std::optional<IndexEvaluator> m_index_evaluator;
  1378. std::vector<ObjKey> m_index_matches;
  1379. const IndexEvaluator* index_based_keys() override
  1380. {
  1381. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  1382. }
  1383. };
  1384. class StringNodeBase : public ParentNode {
  1385. public:
  1386. using TConditionValue = StringData;
  1387. static const bool special_null_node = true;
  1388. StringNodeBase(StringData v, ColKey column)
  1389. : m_value(v.is_null() ? util::none : util::make_optional(std::string(v)))
  1390. {
  1391. m_condition_column_key = column;
  1392. m_dT = 10.0;
  1393. }
  1394. void table_changed() override
  1395. {
  1396. m_is_string_enum = m_table.unchecked_ptr()->is_enumerated(m_condition_column_key);
  1397. }
  1398. void cluster_changed() override
  1399. {
  1400. // Assigning nullptr will cause the Leaf destructor to be called. Must
  1401. // be done before assigning a new one. Otherwise the destructor will be
  1402. // called after the constructor is called and that is unfortunate if
  1403. // the object has the same address. (As in this case)
  1404. m_array_ptr = nullptr;
  1405. // Create new Leaf
  1406. m_array_ptr = LeafPtr(new (&m_leaf_cache_storage) ArrayString(m_table.unchecked_ptr()->get_alloc()));
  1407. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  1408. m_leaf_ptr = m_array_ptr.get();
  1409. }
  1410. void init(bool will_query_ranges) override
  1411. {
  1412. ParentNode::init(will_query_ranges);
  1413. m_probes = 0;
  1414. m_matches = 0;
  1415. m_end_s = 0;
  1416. m_leaf_start = 0;
  1417. m_leaf_end = 0;
  1418. }
  1419. virtual void clear_leaf_state()
  1420. {
  1421. m_array_ptr = nullptr;
  1422. }
  1423. StringNodeBase(const StringNodeBase& from)
  1424. : ParentNode(from)
  1425. , m_value(from.m_value)
  1426. , m_is_string_enum(from.m_is_string_enum)
  1427. {
  1428. }
  1429. virtual std::string describe(util::serializer::SerialisationState& state) const override
  1430. {
  1431. REALM_ASSERT(m_condition_column_key);
  1432. StringData sd;
  1433. if (bool(StringNodeBase::m_value)) {
  1434. sd = StringData(*StringNodeBase::m_value);
  1435. }
  1436. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
  1437. util::serializer::print_value(sd);
  1438. }
  1439. protected:
  1440. util::Optional<std::string> m_value;
  1441. using LeafCacheStorage = typename std::aligned_storage<sizeof(ArrayString), alignof(ArrayString)>::type;
  1442. using LeafPtr = std::unique_ptr<ArrayString, PlacementDelete>;
  1443. LeafCacheStorage m_leaf_cache_storage;
  1444. LeafPtr m_array_ptr;
  1445. const ArrayString* m_leaf_ptr = nullptr;
  1446. bool m_is_string_enum = false;
  1447. size_t m_end_s = 0;
  1448. size_t m_leaf_start = 0;
  1449. size_t m_leaf_end = 0;
  1450. inline StringData get_string(size_t s)
  1451. {
  1452. return m_leaf_ptr->get(s);
  1453. }
  1454. };
  1455. // Conditions for strings. Note that Equal is specialized later in this file!
  1456. template <class TConditionFunction>
  1457. class StringNode : public StringNodeBase {
  1458. public:
  1459. StringNode(StringData v, ColKey column)
  1460. : StringNodeBase(v, column)
  1461. {
  1462. auto upper = case_map(v, true);
  1463. auto lower = case_map(v, false);
  1464. if (!upper || !lower) {
  1465. throw InvalidArgument(util::format("Malformed UTF-8: %1", v));
  1466. }
  1467. else {
  1468. m_ucase = std::move(*upper);
  1469. m_lcase = std::move(*lower);
  1470. }
  1471. }
  1472. void init(bool will_query_ranges) override
  1473. {
  1474. StringNodeBase::init(will_query_ranges);
  1475. clear_leaf_state();
  1476. }
  1477. size_t find_first_local(size_t start, size_t end) override
  1478. {
  1479. TConditionFunction cond;
  1480. for (size_t s = start; s < end; ++s) {
  1481. StringData t = get_string(s);
  1482. if (cond(StringData(m_value), m_ucase.c_str(), m_lcase.c_str(), t))
  1483. return s;
  1484. }
  1485. return not_found;
  1486. }
  1487. virtual std::string describe_condition() const override
  1488. {
  1489. return TConditionFunction::description();
  1490. }
  1491. std::unique_ptr<ParentNode> clone() const override
  1492. {
  1493. return std::unique_ptr<ParentNode>(new StringNode<TConditionFunction>(*this));
  1494. }
  1495. StringNode(const StringNode& from)
  1496. : StringNodeBase(from)
  1497. , m_ucase(from.m_ucase)
  1498. , m_lcase(from.m_lcase)
  1499. {
  1500. }
  1501. protected:
  1502. std::string m_ucase;
  1503. std::string m_lcase;
  1504. };
  1505. // Specialization for Contains condition on Strings - we specialize because we can utilize Boyer-Moore
  1506. template <>
  1507. class StringNode<Contains> : public StringNodeBase {
  1508. public:
  1509. StringNode(StringData v, ColKey column)
  1510. : StringNodeBase(v, column)
  1511. , m_charmap()
  1512. {
  1513. if (v.size() == 0)
  1514. return;
  1515. // Build a dictionary of char-to-last distances in the search string
  1516. // (zero indicates that the char is not in needle)
  1517. size_t last_char_pos = v.size() - 1;
  1518. for (size_t i = 0; i < last_char_pos; ++i) {
  1519. // we never jump longer increments than 255 chars, even if needle is longer (to fit in one byte)
  1520. uint8_t jump = last_char_pos - i < 255 ? static_cast<uint8_t>(last_char_pos - i) : 255;
  1521. unsigned char c = v[i];
  1522. m_charmap[c] = jump;
  1523. }
  1524. m_dT = 50.0;
  1525. }
  1526. void init(bool will_query_ranges) override
  1527. {
  1528. StringNodeBase::init(will_query_ranges);
  1529. clear_leaf_state();
  1530. }
  1531. size_t find_first_local(size_t start, size_t end) override
  1532. {
  1533. Contains cond;
  1534. for (size_t s = start; s < end; ++s) {
  1535. StringData t = get_string(s);
  1536. if (cond(StringData(m_value), m_charmap, t))
  1537. return s;
  1538. }
  1539. return not_found;
  1540. }
  1541. virtual std::string describe_condition() const override
  1542. {
  1543. return Contains::description();
  1544. }
  1545. std::unique_ptr<ParentNode> clone() const override
  1546. {
  1547. return std::unique_ptr<ParentNode>(new StringNode<Contains>(*this));
  1548. }
  1549. StringNode(const StringNode& from)
  1550. : StringNodeBase(from)
  1551. , m_charmap(from.m_charmap)
  1552. {
  1553. }
  1554. protected:
  1555. std::array<uint8_t, 256> m_charmap;
  1556. };
  1557. // Specialization for ContainsIns condition on Strings - we specialize because we can utilize Boyer-Moore
  1558. template <>
  1559. class StringNode<ContainsIns> : public StringNodeBase {
  1560. public:
  1561. StringNode(StringData v, ColKey column)
  1562. : StringNodeBase(v, column)
  1563. , m_charmap()
  1564. {
  1565. auto upper = case_map(v, true);
  1566. auto lower = case_map(v, false);
  1567. if (!upper || !lower) {
  1568. throw query_parser::InvalidQueryError(util::format("Malformed UTF-8: %1", v));
  1569. }
  1570. else {
  1571. m_ucase = std::move(*upper);
  1572. m_lcase = std::move(*lower);
  1573. }
  1574. if (v.size() == 0)
  1575. return;
  1576. // Build a dictionary of char-to-last distances in the search string
  1577. // (zero indicates that the char is not in needle)
  1578. size_t last_char_pos = m_ucase.size() - 1;
  1579. for (size_t i = 0; i < last_char_pos; ++i) {
  1580. // we never jump longer increments than 255 chars, even if needle is longer (to fit in one byte)
  1581. uint8_t jump = last_char_pos - i < 255 ? static_cast<uint8_t>(last_char_pos - i) : 255;
  1582. unsigned char uc = m_ucase[i];
  1583. unsigned char lc = m_lcase[i];
  1584. m_charmap[uc] = jump;
  1585. m_charmap[lc] = jump;
  1586. }
  1587. m_dT = 75.0;
  1588. }
  1589. void init(bool will_query_ranges) override
  1590. {
  1591. StringNodeBase::init(will_query_ranges);
  1592. clear_leaf_state();
  1593. }
  1594. size_t find_first_local(size_t start, size_t end) override
  1595. {
  1596. ContainsIns cond;
  1597. for (size_t s = start; s < end; ++s) {
  1598. StringData t = get_string(s);
  1599. // The current behaviour is to return all results when querying for a null string.
  1600. // See comment above Query_NextGen_StringConditions on why every string including "" contains null.
  1601. if (!bool(m_value)) {
  1602. return s;
  1603. }
  1604. if (cond(StringData(m_value), m_ucase.c_str(), m_lcase.c_str(), m_charmap, t))
  1605. return s;
  1606. }
  1607. return not_found;
  1608. }
  1609. virtual std::string describe_condition() const override
  1610. {
  1611. return ContainsIns::description();
  1612. }
  1613. std::unique_ptr<ParentNode> clone() const override
  1614. {
  1615. return std::unique_ptr<ParentNode>(new StringNode<ContainsIns>(*this));
  1616. }
  1617. StringNode(const StringNode& from)
  1618. : StringNodeBase(from)
  1619. , m_charmap(from.m_charmap)
  1620. , m_ucase(from.m_ucase)
  1621. , m_lcase(from.m_lcase)
  1622. {
  1623. }
  1624. protected:
  1625. std::array<uint8_t, 256> m_charmap;
  1626. std::string m_ucase;
  1627. std::string m_lcase;
  1628. };
  1629. class StringNodeEqualBase : public StringNodeBase {
  1630. public:
  1631. StringNodeEqualBase(StringData v, ColKey column)
  1632. : StringNodeBase(v, column)
  1633. {
  1634. }
  1635. StringNodeEqualBase(const StringNodeEqualBase& from)
  1636. : StringNodeBase(from)
  1637. , m_index_evaluator(from.m_index_evaluator)
  1638. {
  1639. }
  1640. void init(bool) override;
  1641. void table_changed() override
  1642. {
  1643. StringNodeBase::table_changed();
  1644. const bool has_index =
  1645. m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General;
  1646. m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
  1647. }
  1648. bool has_search_index() const override
  1649. {
  1650. return bool(m_index_evaluator);
  1651. }
  1652. void cluster_changed() override
  1653. {
  1654. // If we use searchindex, we do not need further access to clusters
  1655. if (!m_index_evaluator) {
  1656. StringNodeBase::cluster_changed();
  1657. }
  1658. }
  1659. size_t find_first_local(size_t start, size_t end) override;
  1660. virtual std::string describe_condition() const override
  1661. {
  1662. return Equal::description();
  1663. }
  1664. const IndexEvaluator* index_based_keys() override
  1665. {
  1666. return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
  1667. }
  1668. protected:
  1669. std::optional<IndexEvaluator> m_index_evaluator;
  1670. inline BinaryData str_to_bin(const StringData& s) noexcept
  1671. {
  1672. return BinaryData(s.data(), s.size());
  1673. }
  1674. virtual void _search_index_init() = 0;
  1675. virtual size_t _find_first_local(size_t start, size_t end) = 0;
  1676. };
  1677. // Specialization for Equal condition on Strings - we specialize because we can utilize indexes (if they exist) for
  1678. // Equal. This specialisation also supports combining other StringNode<Equal> conditions into itself in order to
  1679. // optimise the non-indexed linear search that can be happen when many conditions are OR'd together in an "IN" query.
  1680. // Future optimization: make specialization for greater, notequal, etc
  1681. template <>
  1682. class StringNode<Equal> : public StringNodeEqualBase {
  1683. public:
  1684. StringNode(StringData v, ColKey column)
  1685. : StringNodeEqualBase(v, column)
  1686. {
  1687. }
  1688. void _search_index_init() override;
  1689. bool do_consume_condition(ParentNode& other) override;
  1690. std::unique_ptr<ParentNode> clone() const override
  1691. {
  1692. return std::unique_ptr<ParentNode>(new StringNode<Equal>(*this));
  1693. }
  1694. std::string describe(util::serializer::SerialisationState& state) const override;
  1695. StringNode(const StringNode& from)
  1696. : StringNodeEqualBase(from)
  1697. {
  1698. for (auto& needle : from.m_needles) {
  1699. if (needle.is_null()) {
  1700. m_needles.emplace();
  1701. }
  1702. else {
  1703. m_needle_storage.push_back(std::make_unique<char[]>(needle.size()));
  1704. std::copy(needle.data(), needle.data() + needle.size(), m_needle_storage.back().get());
  1705. m_needles.insert(StringData(m_needle_storage.back().get(), needle.size()));
  1706. }
  1707. }
  1708. }
  1709. private:
  1710. size_t _find_first_local(size_t start, size_t end) override;
  1711. std::unordered_set<StringData> m_needles;
  1712. std::vector<std::unique_ptr<char[]>> m_needle_storage;
  1713. };
  1714. // Specialization for EqualIns condition on Strings - we specialize because we can utilize indexes (if they exist) for
  1715. // EqualIns.
  1716. template <>
  1717. class StringNode<EqualIns> : public StringNodeEqualBase {
  1718. public:
  1719. StringNode(StringData v, ColKey column)
  1720. : StringNodeEqualBase(v, column)
  1721. {
  1722. auto upper = case_map(v, true);
  1723. auto lower = case_map(v, false);
  1724. if (!upper || !lower) {
  1725. throw query_parser::InvalidQueryError(util::format("Malformed UTF-8: %1", v));
  1726. }
  1727. else {
  1728. m_ucase = std::move(*upper);
  1729. m_lcase = std::move(*lower);
  1730. }
  1731. }
  1732. void _search_index_init() override;
  1733. virtual std::string describe_condition() const override
  1734. {
  1735. return EqualIns::description();
  1736. }
  1737. std::unique_ptr<ParentNode> clone() const override
  1738. {
  1739. return std::unique_ptr<ParentNode>(new StringNode(*this));
  1740. }
  1741. StringNode(const StringNode& from)
  1742. : StringNodeEqualBase(from)
  1743. , m_ucase(from.m_ucase)
  1744. , m_lcase(from.m_lcase)
  1745. {
  1746. }
  1747. private:
  1748. std::vector<ObjKey> m_index_matches;
  1749. std::string m_ucase;
  1750. std::string m_lcase;
  1751. std::vector<ObjKey> storage;
  1752. size_t _find_first_local(size_t start, size_t end) override;
  1753. };
  1754. class LinkMap;
  1755. class StringNodeFulltext : public StringNodeEqualBase {
  1756. public:
  1757. StringNodeFulltext(StringData v, ColKey column, std::unique_ptr<LinkMap> lm = {});
  1758. void table_changed() override;
  1759. void _search_index_init() override;
  1760. bool has_search_index() const override
  1761. {
  1762. return true; // it's a required precondition for fulltext queries
  1763. }
  1764. std::unique_ptr<ParentNode> clone() const override
  1765. {
  1766. return std::unique_ptr<ParentNode>(new StringNodeFulltext(*this));
  1767. }
  1768. virtual std::string describe_condition() const override
  1769. {
  1770. return "FULLTEXT";
  1771. }
  1772. private:
  1773. std::vector<ObjKey> m_index_matches;
  1774. std::unique_ptr<LinkMap> m_link_map;
  1775. StringNodeFulltext(const StringNodeFulltext&);
  1776. size_t _find_first_local(size_t, size_t) override
  1777. {
  1778. REALM_UNREACHABLE();
  1779. }
  1780. };
  1781. // OR node contains at least two node pointers: Two or more conditions to OR
  1782. // together in m_conditions, and the next AND condition (if any) in m_child.
  1783. //
  1784. // For 'second.equal(23).begin_group().first.equal(111).Or().first.equal(222).end_group().third().equal(555)', this
  1785. // will first set m_conditions[0] = left-hand-side through constructor, and then later, when .first.equal(222) is
  1786. // invoked, invocation will set m_conditions[1] = right-hand-side through Query& Query::Or() (see query.cpp).
  1787. // In there, m_child is also set to next AND condition (if any exists) following the OR.
  1788. class OrNode : public ParentNode {
  1789. public:
  1790. OrNode(std::unique_ptr<ParentNode> condition)
  1791. {
  1792. m_dT = 50.0;
  1793. if (condition)
  1794. m_conditions.emplace_back(std::move(condition));
  1795. }
  1796. OrNode(const OrNode& other)
  1797. : ParentNode(other)
  1798. {
  1799. for (const auto& condition : other.m_conditions) {
  1800. m_conditions.emplace_back(condition->clone());
  1801. }
  1802. }
  1803. void table_changed() override
  1804. {
  1805. for (auto& condition : m_conditions) {
  1806. condition->set_table(m_table);
  1807. }
  1808. }
  1809. void cluster_changed() override
  1810. {
  1811. for (auto& condition : m_conditions) {
  1812. condition->set_cluster(m_cluster);
  1813. }
  1814. m_start.clear();
  1815. m_start.resize(m_conditions.size(), 0);
  1816. m_last.clear();
  1817. m_last.resize(m_conditions.size(), 0);
  1818. m_was_match.clear();
  1819. m_was_match.resize(m_conditions.size(), false);
  1820. }
  1821. std::string describe(util::serializer::SerialisationState& state) const override
  1822. {
  1823. std::string s;
  1824. for (size_t i = 0; i < m_conditions.size(); ++i) {
  1825. if (m_conditions[i]) {
  1826. s += m_conditions[i]->describe_expression(state);
  1827. if (i != m_conditions.size() - 1) {
  1828. s += " or ";
  1829. }
  1830. }
  1831. }
  1832. if (m_conditions.size() > 1) {
  1833. s = "(" + s + ")";
  1834. }
  1835. return s;
  1836. }
  1837. void collect_dependencies(std::vector<TableKey>& versions) const override
  1838. {
  1839. for (const auto& cond : m_conditions) {
  1840. cond->collect_dependencies(versions);
  1841. }
  1842. }
  1843. void init(bool will_query_ranges) override
  1844. {
  1845. ParentNode::init(will_query_ranges);
  1846. combine_conditions(!will_query_ranges);
  1847. m_start.clear();
  1848. m_start.resize(m_conditions.size(), 0);
  1849. m_last.clear();
  1850. m_last.resize(m_conditions.size(), 0);
  1851. m_was_match.clear();
  1852. m_was_match.resize(m_conditions.size(), false);
  1853. std::vector<ParentNode*> v;
  1854. for (auto& condition : m_conditions) {
  1855. condition->init(will_query_ranges);
  1856. v.clear();
  1857. condition->gather_children(v);
  1858. }
  1859. }
  1860. size_t find_first_local(size_t start, size_t end) override
  1861. {
  1862. if (start >= end)
  1863. return not_found;
  1864. size_t index = not_found;
  1865. for (size_t c = 0; c < m_conditions.size(); ++c) {
  1866. // out of order search; have to discard cached results
  1867. if (start < m_start[c]) {
  1868. m_last[c] = 0;
  1869. m_was_match[c] = false;
  1870. }
  1871. // already searched this range and didn't match
  1872. else if (m_last[c] >= end)
  1873. continue;
  1874. // already search this range and *did* match
  1875. else if (m_was_match[c] && m_last[c] >= start) {
  1876. if (index > m_last[c])
  1877. index = m_last[c];
  1878. continue;
  1879. }
  1880. m_start[c] = start;
  1881. size_t fmax = std::max(m_last[c], start);
  1882. size_t f = m_conditions[c]->find_first(fmax, end);
  1883. m_was_match[c] = f != not_found;
  1884. m_last[c] = f == not_found ? end : f;
  1885. if (f != not_found && index > m_last[c])
  1886. index = m_last[c];
  1887. }
  1888. return index;
  1889. }
  1890. std::string validate() override
  1891. {
  1892. if (m_conditions.size() == 0)
  1893. return "Missing both arguments of OR";
  1894. if (m_conditions.size() == 1)
  1895. return "Missing argument of OR";
  1896. std::string s;
  1897. if (m_child != 0)
  1898. s = m_child->validate();
  1899. if (s != "")
  1900. return s;
  1901. for (size_t i = 0; i < m_conditions.size(); ++i) {
  1902. s = m_conditions[i]->validate();
  1903. if (s != "")
  1904. return s;
  1905. }
  1906. return "";
  1907. }
  1908. std::unique_ptr<ParentNode> clone() const override
  1909. {
  1910. return std::unique_ptr<ParentNode>(new OrNode(*this));
  1911. }
  1912. std::vector<std::unique_ptr<ParentNode>> m_conditions;
  1913. private:
  1914. void combine_conditions(bool ignore_indexes)
  1915. {
  1916. std::sort(m_conditions.begin(), m_conditions.end(), [](auto& a, auto& b) {
  1917. return a->m_condition_column_key < b->m_condition_column_key;
  1918. });
  1919. auto prev = m_conditions.begin()->get();
  1920. auto cond = [&](auto& node) {
  1921. if (prev->consume_condition(*node, ignore_indexes))
  1922. return true;
  1923. prev = &*node;
  1924. return false;
  1925. };
  1926. m_conditions.erase(std::remove_if(m_conditions.begin() + 1, m_conditions.end(), cond), m_conditions.end());
  1927. }
  1928. // start index of the last find for each cond
  1929. std::vector<size_t> m_start;
  1930. // last looked at index of the lasft find for each cond
  1931. // is a matching index if m_was_match is true
  1932. std::vector<size_t> m_last;
  1933. std::vector<bool> m_was_match;
  1934. };
  1935. class NotNode : public ParentNode {
  1936. public:
  1937. NotNode(std::unique_ptr<ParentNode> condition)
  1938. : m_condition(std::move(condition))
  1939. {
  1940. m_dT = 50.0;
  1941. if (!m_condition) {
  1942. throw query_parser::InvalidQueryError("Missing argument to Not");
  1943. }
  1944. }
  1945. void table_changed() override
  1946. {
  1947. m_condition->set_table(m_table);
  1948. }
  1949. void cluster_changed() override
  1950. {
  1951. m_condition->set_cluster(m_cluster);
  1952. // Heuristics bookkeeping:
  1953. m_known_range_start = 0;
  1954. m_known_range_end = 0;
  1955. m_first_in_known_range = not_found;
  1956. }
  1957. void init(bool will_query_ranges) override
  1958. {
  1959. ParentNode::init(will_query_ranges);
  1960. std::vector<ParentNode*> v;
  1961. m_condition->init(false);
  1962. v.clear();
  1963. m_condition->gather_children(v);
  1964. }
  1965. size_t find_first_local(size_t start, size_t end) override;
  1966. std::string describe(util::serializer::SerialisationState& state) const override
  1967. {
  1968. if (m_condition) {
  1969. return "!(" + m_condition->describe_expression(state) + ")";
  1970. }
  1971. return "!()";
  1972. }
  1973. void collect_dependencies(std::vector<TableKey>& versions) const override
  1974. {
  1975. if (m_condition) {
  1976. m_condition->collect_dependencies(versions);
  1977. }
  1978. }
  1979. std::unique_ptr<ParentNode> clone() const override
  1980. {
  1981. return std::unique_ptr<ParentNode>(new NotNode(*this));
  1982. }
  1983. NotNode(const NotNode& from)
  1984. : ParentNode(from)
  1985. , m_condition(from.m_condition ? from.m_condition->clone() : nullptr)
  1986. , m_known_range_start(from.m_known_range_start)
  1987. , m_known_range_end(from.m_known_range_end)
  1988. , m_first_in_known_range(from.m_first_in_known_range)
  1989. {
  1990. }
  1991. std::unique_ptr<ParentNode> m_condition;
  1992. private:
  1993. // FIXME This heuristic might as well be reused for all condition nodes.
  1994. size_t m_known_range_start;
  1995. size_t m_known_range_end;
  1996. size_t m_first_in_known_range;
  1997. bool evaluate_at(size_t rowndx);
  1998. void update_known(size_t start, size_t end, size_t first);
  1999. size_t find_first_loop(size_t start, size_t end);
  2000. size_t find_first_covers_known(size_t start, size_t end);
  2001. size_t find_first_covered_by_known(size_t start, size_t end);
  2002. size_t find_first_overlap_lower(size_t start, size_t end);
  2003. size_t find_first_overlap_upper(size_t start, size_t end);
  2004. size_t find_first_no_overlap(size_t start, size_t end);
  2005. };
  2006. // Compare two columns with eachother row-by-row
  2007. class TwoColumnsNodeBase : public ParentNode {
  2008. public:
  2009. TwoColumnsNodeBase(ColKey column1, ColKey column2)
  2010. {
  2011. m_dT = 100.0;
  2012. m_condition_column_key1 = column1;
  2013. m_condition_column_key2 = column2;
  2014. if (m_condition_column_key1.is_collection() || m_condition_column_key2.is_collection()) {
  2015. throw Exception(ErrorCodes::InvalidQuery,
  2016. util::format("queries comparing two properties are not yet supported for "
  2017. "collections (list/set/dictionary) (%1 and %2)",
  2018. ParentNode::m_table->get_column_name(m_condition_column_key1),
  2019. ParentNode::m_table->get_column_name(m_condition_column_key2)));
  2020. }
  2021. }
  2022. ~TwoColumnsNodeBase() noexcept override {}
  2023. void table_changed() override
  2024. {
  2025. if (this->m_table) {
  2026. ParentNode::m_table->check_column(m_condition_column_key1);
  2027. ParentNode::m_table->check_column(m_condition_column_key2);
  2028. }
  2029. }
  2030. static std::unique_ptr<ArrayPayload> update_cached_leaf_pointers_for_column(Allocator& alloc,
  2031. const ColKey& col_key);
  2032. void cluster_changed() override
  2033. {
  2034. if (!m_leaf_ptr1) {
  2035. m_leaf_ptr1 =
  2036. update_cached_leaf_pointers_for_column(m_table.unchecked_ptr()->get_alloc(), m_condition_column_key1);
  2037. }
  2038. if (!m_leaf_ptr2) {
  2039. m_leaf_ptr2 =
  2040. update_cached_leaf_pointers_for_column(m_table.unchecked_ptr()->get_alloc(), m_condition_column_key2);
  2041. }
  2042. this->m_cluster->init_leaf(m_condition_column_key1, m_leaf_ptr1.get());
  2043. this->m_cluster->init_leaf(m_condition_column_key2, m_leaf_ptr2.get());
  2044. }
  2045. virtual std::string describe(util::serializer::SerialisationState& state) const override
  2046. {
  2047. REALM_ASSERT(m_condition_column_key1 && m_condition_column_key2);
  2048. return state.describe_column(ParentNode::m_table, m_condition_column_key1) + " " + describe_condition() +
  2049. " " + state.describe_column(ParentNode::m_table, m_condition_column_key2);
  2050. }
  2051. TwoColumnsNodeBase(const TwoColumnsNodeBase& from)
  2052. : ParentNode(from)
  2053. , m_condition_column_key1(from.m_condition_column_key1)
  2054. , m_condition_column_key2(from.m_condition_column_key2)
  2055. {
  2056. }
  2057. protected:
  2058. mutable ColKey m_condition_column_key1;
  2059. mutable ColKey m_condition_column_key2;
  2060. std::unique_ptr<ArrayPayload> m_leaf_ptr1 = nullptr;
  2061. std::unique_ptr<ArrayPayload> m_leaf_ptr2 = nullptr;
  2062. };
  2063. template <class TConditionFunction>
  2064. class TwoColumnsNode : public TwoColumnsNodeBase {
  2065. public:
  2066. using TwoColumnsNodeBase::TwoColumnsNodeBase;
  2067. ~TwoColumnsNode() noexcept override {}
  2068. size_t find_first_local(size_t start, size_t end) override
  2069. {
  2070. size_t s = start;
  2071. while (s < end) {
  2072. QueryValue v1(m_leaf_ptr1->get_any(s));
  2073. QueryValue v2(m_leaf_ptr2->get_any(s));
  2074. if (TConditionFunction()(v1, v2))
  2075. return s;
  2076. else
  2077. s++;
  2078. }
  2079. return not_found;
  2080. }
  2081. virtual std::string describe_condition() const override
  2082. {
  2083. return TConditionFunction::description();
  2084. }
  2085. std::unique_ptr<ParentNode> clone() const override
  2086. {
  2087. return std::unique_ptr<ParentNode>(new TwoColumnsNode<TConditionFunction>(*this));
  2088. }
  2089. protected:
  2090. TwoColumnsNode(const TwoColumnsNode& from, Transaction* tr)
  2091. : TwoColumnsNode(from, tr)
  2092. {
  2093. }
  2094. };
  2095. // For Next-Generation expressions like col1 / col2 + 123 > col4 * 100.
  2096. class ExpressionNode : public ParentNode {
  2097. public:
  2098. ExpressionNode(std::unique_ptr<Expression>);
  2099. void init(bool) override;
  2100. size_t find_first_local(size_t start, size_t end) override;
  2101. void table_changed() override;
  2102. void cluster_changed() override;
  2103. void collect_dependencies(std::vector<TableKey>&) const override;
  2104. virtual std::string describe(util::serializer::SerialisationState& state) const override;
  2105. std::unique_ptr<ParentNode> clone() const override;
  2106. private:
  2107. ExpressionNode(const ExpressionNode& from);
  2108. std::unique_ptr<Expression> m_expression;
  2109. };
  2110. class LinksToNodeBase : public ParentNode {
  2111. public:
  2112. LinksToNodeBase(ColKey origin_column_key, ObjKey target_key)
  2113. : LinksToNodeBase(origin_column_key, std::vector<ObjKey>{target_key})
  2114. {
  2115. }
  2116. LinksToNodeBase(ColKey origin_column_key, const std::vector<ObjKey>& target_keys)
  2117. : m_target_keys(target_keys)
  2118. {
  2119. m_dT = 50.0;
  2120. m_condition_column_key = origin_column_key;
  2121. m_column_type = origin_column_key.get_type();
  2122. REALM_ASSERT(m_column_type == col_type_Link || m_column_type == col_type_LinkList);
  2123. REALM_ASSERT(!m_target_keys.empty());
  2124. }
  2125. void cluster_changed() override
  2126. {
  2127. m_array_ptr = nullptr;
  2128. if (m_column_type == col_type_Link) {
  2129. m_array_ptr = LeafPtr(new (&m_storage.m_list) ArrayKey(m_table.unchecked_ptr()->get_alloc()));
  2130. }
  2131. else if (m_column_type == col_type_LinkList) {
  2132. m_array_ptr = LeafPtr(new (&m_storage.m_linklist) ArrayList(m_table.unchecked_ptr()->get_alloc()));
  2133. }
  2134. m_cluster->init_leaf(this->m_condition_column_key, m_array_ptr.get());
  2135. m_leaf_ptr = m_array_ptr.get();
  2136. }
  2137. virtual std::string describe(util::serializer::SerialisationState& state) const override
  2138. {
  2139. REALM_ASSERT(m_condition_column_key);
  2140. if (m_target_keys.size() > 1)
  2141. throw SerializationError("Serializing a query which links to multiple objects is currently unsupported.");
  2142. ObjLink link(m_table->get_opposite_table(m_condition_column_key)->get_key(), m_target_keys[0]);
  2143. return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
  2144. util::serializer::print_value(link, m_table->get_parent_group());
  2145. }
  2146. protected:
  2147. std::vector<ObjKey> m_target_keys;
  2148. ColumnType m_column_type;
  2149. using LeafPtr = std::unique_ptr<ArrayPayload, PlacementDelete>;
  2150. union Storage {
  2151. typename std::aligned_storage<sizeof(ArrayKey), alignof(ArrayKey)>::type m_list;
  2152. typename std::aligned_storage<sizeof(ArrayList), alignof(ArrayList)>::type m_linklist;
  2153. };
  2154. Storage m_storage;
  2155. LeafPtr m_array_ptr;
  2156. const ArrayPayload* m_leaf_ptr = nullptr;
  2157. LinksToNodeBase(const LinksToNodeBase& source)
  2158. : ParentNode(source)
  2159. , m_target_keys(source.m_target_keys)
  2160. , m_column_type(source.m_column_type)
  2161. {
  2162. }
  2163. };
  2164. template <class TConditionFunction>
  2165. class LinksToNode : public LinksToNodeBase {
  2166. public:
  2167. using LinksToNodeBase::LinksToNodeBase;
  2168. std::string describe_condition() const override
  2169. {
  2170. return TConditionFunction::description();
  2171. }
  2172. std::unique_ptr<ParentNode> clone() const override
  2173. {
  2174. return std::unique_ptr<ParentNode>(new LinksToNode<TConditionFunction>(*this));
  2175. }
  2176. size_t find_first_local(size_t start, size_t end) override;
  2177. };
  2178. } // namespace realm
  2179. #endif // REALM_QUERY_ENGINE_HPP