changeset.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. #ifndef REALM_SYNC_CHANGESET_HPP
  2. #define REALM_SYNC_CHANGESET_HPP
  3. #include <realm/sync/instructions.hpp>
  4. #include <realm/util/optional.hpp>
  5. #include <type_traits>
  6. namespace realm {
  7. namespace sync {
  8. using InternStrings = std::vector<StringBufferRange>;
  9. struct BadChangesetError : Exception {
  10. BadChangesetError(const std::string& msg)
  11. : Exception(ErrorCodes::BadChangeset, util::format("%1. Please contact support.", msg))
  12. {
  13. }
  14. };
  15. struct Changeset {
  16. struct Range;
  17. using timestamp_type = uint_fast64_t;
  18. using file_ident_type = uint_fast64_t;
  19. using version_type = uint_fast64_t; // FIXME: Get from `History`.
  20. InternString intern_string(StringData); // Slow!
  21. InternString find_string(StringData) const noexcept; // Slow!
  22. StringData string_data() const noexcept;
  23. std::string& string_buffer() noexcept;
  24. const std::string& string_buffer() const noexcept;
  25. const InternStrings& interned_strings() const noexcept;
  26. InternStrings& interned_strings() noexcept;
  27. StringBufferRange get_intern_string(InternString) const noexcept;
  28. util::Optional<StringBufferRange> try_get_intern_string(InternString) const noexcept;
  29. util::Optional<StringData> try_get_string(StringBufferRange) const noexcept;
  30. util::Optional<StringData> try_get_string(InternString) const noexcept;
  31. StringData get_string(StringBufferRange) const noexcept;
  32. StringData get_string(InternString) const noexcept;
  33. StringBufferRange append_string(StringData);
  34. PrimaryKey get_key(const Instruction::PrimaryKey& value) const noexcept;
  35. std::ostream& print_value(std::ostream& os, const Instruction::Payload& value) const noexcept;
  36. std::ostream& print_path(std::ostream& os, const Instruction::Path& value) const noexcept;
  37. std::ostream& print_path(std::ostream& os, InternString table, const Instruction::PrimaryKey& pk,
  38. util::Optional<InternString> field = util::none,
  39. const Instruction::Path* path = nullptr) const;
  40. /// Mark the changeset as "dirty" (i.e. modified by the merge algorithm).
  41. void set_dirty(bool dirty = true) noexcept;
  42. /// Whether or not the changeset is "dirty" (i.e. has been modified by the
  43. /// merge algorithm).
  44. bool is_dirty() const noexcept;
  45. // Interface to imitate std::vector:
  46. template <bool is_const>
  47. struct IteratorImpl;
  48. using iterator = IteratorImpl<false>;
  49. using const_iterator = IteratorImpl<true>;
  50. iterator begin() noexcept;
  51. iterator end() noexcept;
  52. const_iterator begin() const noexcept;
  53. const_iterator end() const noexcept;
  54. const_iterator cbegin() const noexcept;
  55. const_iterator cend() const noexcept;
  56. bool empty() const noexcept;
  57. /// Size of the Changeset, not counting tombstones.
  58. ///
  59. /// FIXME: This is an O(n) operation.
  60. size_t size() const noexcept;
  61. void clear() noexcept;
  62. //@{
  63. /// Insert instructions, invalidating all iterators.
  64. iterator insert(const_iterator pos, Instruction);
  65. template <class InputIt>
  66. iterator insert(const_iterator pos, InputIt begin, InputIt end);
  67. //@}
  68. /// Erase an instruction, invalidating all iterators.
  69. iterator erase(const_iterator);
  70. /// Insert an instruction at the end, invalidating all iterators.
  71. void push_back(const Instruction&);
  72. //@{
  73. /// Insert instructions at \a position without invalidating other
  74. /// iterators.
  75. ///
  76. /// Only iterators created before any call to `insert_stable()` may be
  77. /// considered stable across calls to `insert_stable()`. In addition,
  78. /// "iterator stability" has a very specific meaning here: Other copies of
  79. /// \a position in the program will point to the newly inserted elements
  80. /// after calling `insert_stable()`, rather than point to the value at the
  81. /// position prior to insertion. This is different from, say, a tree
  82. /// structure, where iterator stability signifies the property that
  83. /// iterators keep pointing to the same element after insertion before or
  84. /// after that position.
  85. ///
  86. /// For the purpose of supporting `ChangesetIndex`, and the OT merge
  87. /// algorithm, these semantics are acceptable, since prepended instructions
  88. /// can never create new object or table references.
  89. iterator insert_stable(const_iterator position, Instruction);
  90. template <class InputIt>
  91. iterator insert_stable(const_iterator position, InputIt begin, InputIt end);
  92. //@}
  93. /// Erase instruction at \a position without invalidating other iterators.
  94. /// If erasing the object would invalidate other iterators, it is turned
  95. /// into a tombstone instead, and subsequent derefencing of the iterator
  96. /// will return `nullptr`. An iterator pointing to a tombstone remains valid
  97. /// and can be incremented.
  98. ///
  99. /// Only iterators created before any call to `insert_stable()` may be
  100. /// considered stable across calls to `erase_stable()`. If other copies of
  101. /// \a position exist in the program, they will either point to the
  102. /// subsequent element if that element was previously inserted with
  103. /// `insert_stable()`, or otherwise it will be turned into a tombstone.
  104. iterator erase_stable(const_iterator position);
  105. #if REALM_DEBUG
  106. struct Reflector;
  107. struct Printer;
  108. void verify() const;
  109. void print(std::ostream&) const;
  110. void print() const; // prints to std::err
  111. #endif
  112. /// The version that this changeset produced. Note: This may not be the
  113. /// version produced by this changeset on the client on which this changeset
  114. /// originated, but may for instance be the version produced on the server
  115. /// after receiving and re-sending this changeset to another client.
  116. ///
  117. /// FIXME: The explanation above is confusing. The truth is that if this
  118. /// changeset was received by a client from the server, then \a version is
  119. /// the version that was produced on the server by this changeset.
  120. ///
  121. /// FIXME: This property, as well as \a last_integrated_remote_version, \a
  122. /// origin_timestamp, and \a origin_file_ident should probably be removed
  123. /// from this class, as they are not a logical part of a changeset, and also
  124. /// are difficult to document without knowing more about what context the
  125. /// changeset object occurs. Also, functions such as
  126. /// InstructionApplier::apply() that a changeset as argument, but do not
  127. /// care about those properties.
  128. version_type version = 0;
  129. /// On clients, the last integrated server version. On the server, this is
  130. /// the last integrated client version.
  131. ///
  132. /// FIXME: The explanation above is confusing. The truth is that if this
  133. /// changeset was received by a client from the server, then \a
  134. /// last_integrated_remote_version is the last client version that was
  135. /// integrated by the server at the server version referencened by \a
  136. /// version.
  137. version_type last_integrated_remote_version = 0;
  138. /// Timestamp at origin when the original untransformed changeset was
  139. /// produced.
  140. timestamp_type origin_timestamp = 0;
  141. /// The identifier of the file in the context of which the original
  142. /// untransformed changeset was produced.
  143. file_ident_type origin_file_ident = 0;
  144. /// Must be set before passing this Changeset to Transformer::transform_remote_changesets
  145. /// to the index of this changeset within the received changesets.
  146. ///
  147. /// In FLX sync the server may send multiple idempotent changesets with the same server version
  148. /// when bootstrapping data. Internal data structures within the OT Transformer require the
  149. /// input changesets to be sortable in the order that they were received. If the version number
  150. /// is not increasing, this will be used to determine the correct sort order.
  151. ///
  152. /// FIXME: This is a hack that we need to figure out a better way of fixing. This can maybe
  153. /// be part of refactoring the ChangesetIndex
  154. size_t transform_sequence = 0;
  155. /// If the changeset was compacted during download, the size of the original
  156. /// changeset. Only applies to changesets sent by the server.
  157. std::size_t original_changeset_size = 0;
  158. /// Compare for exact equality, including that interned strings have the
  159. /// same integer values, and there is the same number of interned strings,
  160. /// same topology of tombstones, etc.
  161. bool operator==(const Changeset& that) const noexcept;
  162. bool operator!=(const Changeset& that) const noexcept;
  163. private:
  164. std::vector<Instruction> m_instructions;
  165. std::string m_string_buffer;
  166. InternStrings m_strings;
  167. bool m_is_dirty = false;
  168. iterator const_iterator_to_iterator(const_iterator);
  169. };
  170. std::ostream& operator<<(std::ostream&, const Changeset& changeset);
  171. /// An iterator type that hides the implementation details of the support for
  172. /// iterator stability.
  173. ///
  174. /// A `Changeset::iterator` is composed of an
  175. /// `std::vector<InstructionContainer>::iterator` and a `size_t` representing
  176. /// the index into the current `InstructionContainer`. If that container is
  177. /// empty, and the position is zero, the iterator is pointing to a tombstone.
  178. template <bool is_const>
  179. struct Changeset::IteratorImpl {
  180. using list_type = std::vector<Instruction>;
  181. using inner_iterator_type = std::conditional_t<is_const, list_type::const_iterator, list_type::iterator>;
  182. // reference_type is a pointer because we have no way to create a reference
  183. // to a tombstone instruction. Alternatively, it could have been
  184. // `util::Optional<Instruction&>`, but that runs into other issues.
  185. using reference_type = std::conditional_t<is_const, const Instruction*, Instruction*>;
  186. using pointer_type = std::conditional_t<is_const, const Instruction*, Instruction*>;
  187. using difference_type = std::ptrdiff_t;
  188. IteratorImpl()
  189. : m_pos(0)
  190. {
  191. }
  192. template <bool is_const_ = is_const>
  193. IteratorImpl(const IteratorImpl<false>& other, std::enable_if_t<is_const_>* = nullptr)
  194. : m_inner(other.m_inner)
  195. , m_pos(other.m_pos)
  196. {
  197. }
  198. IteratorImpl(inner_iterator_type inner, size_t pos = 0)
  199. : m_inner(inner)
  200. , m_pos(pos)
  201. {
  202. }
  203. inline IteratorImpl& operator++()
  204. {
  205. ++m_pos;
  206. if (m_pos >= m_inner->size()) {
  207. ++m_inner;
  208. m_pos = 0;
  209. }
  210. return *this;
  211. }
  212. IteratorImpl operator++(int)
  213. {
  214. auto copy = *this;
  215. ++(*this);
  216. return copy;
  217. }
  218. IteratorImpl& operator--()
  219. {
  220. if (m_pos == 0) {
  221. --m_inner;
  222. m_pos = m_inner->size();
  223. if (m_pos != 0)
  224. --m_pos;
  225. }
  226. else {
  227. --m_pos;
  228. }
  229. return *this;
  230. }
  231. IteratorImpl operator--(int)
  232. {
  233. auto copy = *this;
  234. --(*this);
  235. return copy;
  236. }
  237. reference_type operator*() const
  238. {
  239. if (m_inner->size()) {
  240. return &m_inner->at(m_pos);
  241. }
  242. // It was a tombstone.
  243. return nullptr;
  244. }
  245. pointer_type operator->() const
  246. {
  247. if (m_inner->size()) {
  248. return &m_inner->at(m_pos);
  249. }
  250. // It was a tombstone.
  251. return nullptr;
  252. }
  253. bool operator==(const IteratorImpl& other) const
  254. {
  255. return m_inner == other.m_inner && m_pos == other.m_pos;
  256. }
  257. bool operator!=(const IteratorImpl& other) const
  258. {
  259. return !(*this == other);
  260. }
  261. bool operator<(const IteratorImpl& other) const
  262. {
  263. if (m_inner == other.m_inner)
  264. return m_pos < other.m_pos;
  265. return m_inner < other.m_inner;
  266. }
  267. bool operator<=(const IteratorImpl& other) const
  268. {
  269. if (m_inner == other.m_inner)
  270. return m_pos <= other.m_pos;
  271. return m_inner < other.m_inner;
  272. }
  273. bool operator>(const IteratorImpl& other) const
  274. {
  275. if (m_inner == other.m_inner)
  276. return m_pos > other.m_pos;
  277. return m_inner > other.m_inner;
  278. }
  279. bool operator>=(const IteratorImpl& other) const
  280. {
  281. if (m_inner == other.m_inner)
  282. return m_pos >= other.m_pos;
  283. return m_inner > other.m_inner;
  284. }
  285. inner_iterator_type m_inner;
  286. size_t m_pos;
  287. };
  288. struct Changeset::Range {
  289. iterator begin;
  290. iterator end;
  291. };
  292. #if REALM_DEBUG
  293. struct Changeset::Reflector {
  294. struct Tracer {
  295. virtual void name(StringData) = 0;
  296. virtual void path(StringData, InternString table, const Instruction::PrimaryKey& object_key,
  297. util::Optional<InternString> field, const Instruction::Path* path) = 0;
  298. virtual void field(StringData, InternString) = 0;
  299. virtual void field(StringData, Instruction::Payload::Type) = 0;
  300. virtual void field(StringData, Instruction::AddColumn::CollectionType) = 0;
  301. virtual void field(StringData, const Instruction::PrimaryKey&) = 0;
  302. virtual void field(StringData, const Instruction::Payload&) = 0;
  303. virtual void field(StringData, const Instruction::Path&) = 0;
  304. virtual void field(StringData, uint32_t) = 0;
  305. virtual void set_changeset(const Changeset*) = 0;
  306. virtual void after_each() {}
  307. virtual void before_each() {}
  308. };
  309. Reflector(Tracer& tracer, const Changeset& changeset)
  310. : m_tracer(tracer)
  311. , m_changeset(changeset)
  312. {
  313. tracer.set_changeset(&changeset);
  314. }
  315. void visit_all() const;
  316. private:
  317. Tracer& m_tracer;
  318. const Changeset& m_changeset;
  319. void table_instr(const Instruction::TableInstruction&) const;
  320. void object_instr(const Instruction::ObjectInstruction&) const;
  321. void path_instr(const Instruction::PathInstruction&) const;
  322. friend struct Instruction;
  323. #define REALM_DEFINE_REFLECTOR_VISITOR(X) void operator()(const Instruction::X&) const;
  324. REALM_FOR_EACH_INSTRUCTION_TYPE(REALM_DEFINE_REFLECTOR_VISITOR)
  325. #undef REALM_DEFINE_REFLECTOR_VISITOR
  326. };
  327. struct Changeset::Printer : Changeset::Reflector::Tracer {
  328. explicit Printer(std::ostream& os)
  329. : m_out(os)
  330. {
  331. }
  332. // ChangesetReflector::Tracer interface:
  333. void name(StringData) final;
  334. void path(StringData, InternString table, const Instruction::PrimaryKey&, util::Optional<InternString> field,
  335. const Instruction::Path* path) final;
  336. void field(StringData, InternString) final;
  337. void field(StringData, Instruction::Payload::Type) final;
  338. void field(StringData, Instruction::AddColumn::CollectionType) final;
  339. void field(StringData, const Instruction::PrimaryKey&) final;
  340. void field(StringData, const Instruction::Payload&) final;
  341. void field(StringData, const Instruction::Path&) final;
  342. void field(StringData, uint32_t) final;
  343. void set_changeset(const Changeset* changeset) final
  344. {
  345. m_changeset = changeset;
  346. }
  347. void after_each() final;
  348. private:
  349. std::ostream& m_out;
  350. bool m_first = true;
  351. const Changeset* m_changeset = nullptr;
  352. void pad_or_ellipsis(StringData, int width) const;
  353. void print_field(StringData name, std::string value);
  354. std::string primary_key_to_string(const Instruction::PrimaryKey&);
  355. };
  356. #endif // REALM_DEBUG
  357. /// Implementation:
  358. inline Changeset::iterator Changeset::begin() noexcept
  359. {
  360. return m_instructions.begin();
  361. }
  362. inline Changeset::iterator Changeset::end() noexcept
  363. {
  364. return m_instructions.end();
  365. }
  366. inline Changeset::const_iterator Changeset::begin() const noexcept
  367. {
  368. return m_instructions.begin();
  369. }
  370. inline Changeset::const_iterator Changeset::end() const noexcept
  371. {
  372. return m_instructions.end();
  373. }
  374. inline Changeset::const_iterator Changeset::cbegin() const noexcept
  375. {
  376. return m_instructions.cbegin();
  377. }
  378. inline Changeset::const_iterator Changeset::cend() const noexcept
  379. {
  380. return m_instructions.end();
  381. }
  382. inline bool Changeset::empty() const noexcept
  383. {
  384. return size() == 0;
  385. }
  386. inline size_t Changeset::size() const noexcept
  387. {
  388. size_t sum = 0;
  389. for (auto& x : m_instructions)
  390. sum += x.size();
  391. return sum;
  392. }
  393. inline void Changeset::clear() noexcept
  394. {
  395. m_instructions.clear();
  396. }
  397. inline util::Optional<StringBufferRange> Changeset::try_get_intern_string(InternString string) const noexcept
  398. {
  399. if (string.value >= m_strings.size())
  400. return util::none;
  401. return m_strings[string.value];
  402. }
  403. inline StringBufferRange Changeset::get_intern_string(InternString string) const noexcept
  404. {
  405. REALM_ASSERT(string.value < m_strings.size());
  406. return m_strings[string.value];
  407. }
  408. inline InternStrings& Changeset::interned_strings() noexcept
  409. {
  410. return m_strings;
  411. }
  412. inline const InternStrings& Changeset::interned_strings() const noexcept
  413. {
  414. return m_strings;
  415. }
  416. inline auto Changeset::string_buffer() noexcept -> std::string&
  417. {
  418. return m_string_buffer;
  419. }
  420. inline auto Changeset::string_buffer() const noexcept -> const std::string&
  421. {
  422. return m_string_buffer;
  423. }
  424. inline util::Optional<StringData> Changeset::try_get_string(StringBufferRange range) const noexcept
  425. {
  426. if (range.offset > m_string_buffer.size())
  427. return util::none;
  428. if (range.offset + range.size > m_string_buffer.size())
  429. return util::none;
  430. return StringData{m_string_buffer.data() + range.offset, range.size};
  431. }
  432. inline util::Optional<StringData> Changeset::try_get_string(InternString str) const noexcept
  433. {
  434. if (auto range = try_get_intern_string(str)) {
  435. return try_get_string(*range);
  436. }
  437. return util::none;
  438. }
  439. inline StringData Changeset::get_string(StringBufferRange range) const noexcept
  440. {
  441. auto string = try_get_string(range);
  442. REALM_ASSERT(string);
  443. return *string;
  444. }
  445. inline StringData Changeset::get_string(InternString string) const noexcept
  446. {
  447. return get_string(get_intern_string(string));
  448. }
  449. inline StringData Changeset::string_data() const noexcept
  450. {
  451. return StringData{m_string_buffer.data(), m_string_buffer.size()};
  452. }
  453. inline StringBufferRange Changeset::append_string(StringData string)
  454. {
  455. // We expect more strings. Only do this at the beginning because until C++20, reserve
  456. // will shrink_to_fit if the request is less than the current capacity.
  457. constexpr size_t small_string_buffer_size = 1024;
  458. if (m_string_buffer.capacity() < small_string_buffer_size) {
  459. m_string_buffer.reserve(small_string_buffer_size);
  460. }
  461. size_t offset = m_string_buffer.size();
  462. m_string_buffer.append(string.data(), string.size());
  463. return StringBufferRange{uint32_t(offset), uint32_t(string.size())};
  464. }
  465. inline bool Changeset::is_dirty() const noexcept
  466. {
  467. return m_is_dirty;
  468. }
  469. inline void Changeset::set_dirty(bool dirty) noexcept
  470. {
  471. m_is_dirty = dirty;
  472. }
  473. inline Changeset::iterator Changeset::insert(const_iterator pos, Instruction instr)
  474. {
  475. Instruction* p = &instr;
  476. return insert(pos, p, p + 1);
  477. }
  478. template <class InputIt>
  479. inline Changeset::iterator Changeset::insert(const_iterator pos, InputIt begin, InputIt end)
  480. {
  481. if (pos.m_pos == 0)
  482. return m_instructions.insert(pos.m_inner, begin, end);
  483. return insert_stable(pos, begin, end);
  484. }
  485. inline Changeset::iterator Changeset::erase(const_iterator pos)
  486. {
  487. if (pos.m_inner->size() <= 1)
  488. return m_instructions.erase(pos.m_inner);
  489. return erase_stable(pos);
  490. }
  491. inline Changeset::iterator Changeset::insert_stable(const_iterator pos, Instruction instr)
  492. {
  493. Instruction* p = &instr;
  494. return insert_stable(pos, p, p + 1);
  495. }
  496. template <class InputIt>
  497. inline Changeset::iterator Changeset::insert_stable(const_iterator cpos, InputIt begin, InputIt end)
  498. {
  499. iterator pos = const_iterator_to_iterator(cpos);
  500. size_t i = 0;
  501. for (auto it = begin; it != end; ++it, ++i) {
  502. pos.m_inner->insert(pos.m_pos + i, *it);
  503. }
  504. return pos;
  505. }
  506. inline Changeset::iterator Changeset::erase_stable(const_iterator cpos)
  507. {
  508. auto pos = const_iterator_to_iterator(cpos);
  509. auto begin = m_instructions.begin();
  510. auto end = m_instructions.end();
  511. REALM_ASSERT(pos.m_inner >= begin);
  512. REALM_ASSERT(pos.m_inner < end);
  513. pos.m_inner->erase(pos.m_pos);
  514. if (pos.m_pos >= pos.m_inner->size()) {
  515. do {
  516. ++pos.m_inner;
  517. } while (pos.m_inner != end && pos.m_inner->is_empty());
  518. pos.m_pos = 0;
  519. }
  520. return pos;
  521. }
  522. inline void Changeset::push_back(const Instruction& instr)
  523. {
  524. m_instructions.emplace_back(instr);
  525. }
  526. inline auto Changeset::const_iterator_to_iterator(const_iterator cpos) -> iterator
  527. {
  528. size_t offset = cpos.m_inner - m_instructions.cbegin();
  529. return iterator{m_instructions.begin() + offset, cpos.m_pos};
  530. }
  531. inline bool Changeset::operator!=(const Changeset& that) const noexcept
  532. {
  533. return !(*this == that);
  534. }
  535. } // namespace sync
  536. } // namespace realm
  537. namespace std {
  538. template <bool is_const>
  539. struct iterator_traits<realm::sync::Changeset::IteratorImpl<is_const>> {
  540. using difference_type = std::ptrdiff_t;
  541. using iterator_category = std::bidirectional_iterator_tag;
  542. };
  543. } // namespace std
  544. #endif // REALM_SYNC_CHANGESET_HPP