group_writer.hpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*************************************************************************
  2. *
  3. * Copyright 2016 Realm Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. **************************************************************************/
  18. #ifndef REALM_GROUP_WRITER_HPP
  19. #define REALM_GROUP_WRITER_HPP
  20. #include <cstdint> // unint8_t etc
  21. #include <utility>
  22. #include <map>
  23. #include <realm/util/file.hpp>
  24. #include <realm/alloc.hpp>
  25. #include <realm/array.hpp>
  26. #include <realm/impl/array_writer.hpp>
  27. #include <realm/db_options.hpp>
  28. namespace realm {
  29. // Pre-declarations
  30. class Group;
  31. class SlabAlloc;
  32. class Reachable {
  33. public:
  34. ref_type pos;
  35. size_t size;
  36. };
  37. class VersionInfo {
  38. public:
  39. VersionInfo(ref_type t, ref_type l)
  40. : top_ref(t)
  41. , logical_file_size(l)
  42. {
  43. }
  44. ref_type top_ref;
  45. ref_type logical_file_size;
  46. // used in debug mode to validate backdating algo:
  47. std::vector<Reachable> reachable_blocks;
  48. };
  49. using TopRefMap = std::map<uint64_t, VersionInfo>;
  50. using VersionVector = std::vector<uint64_t>;
  51. /// This class is not supposed to be reused for multiple write sessions. In
  52. /// particular, do not reuse it in case any of the functions throw.
  53. class GroupWriter : public _impl::ArrayWriterBase {
  54. public:
  55. enum class EvacuationStage { idle, evacuating, waiting, blocked };
  56. // For groups in transactional mode (Group::m_is_shared), this constructor
  57. // must be called while a write transaction is in progress.
  58. //
  59. // The constructor adds free-space tracking information to the specified
  60. // group, if it is not already present (4th and 5th entry in
  61. // Group::m_top). If the specified group is in transactional mode
  62. // (Group::m_is_shared), the constructor also adds version tracking
  63. // information to the group, if it is not already present (6th and 7th entry
  64. // in Group::m_top).
  65. using Durability = DBOptions::Durability;
  66. GroupWriter(Group&, Durability dura = Durability::Full, util::WriteMarker* write_marker = nullptr);
  67. ~GroupWriter();
  68. void set_versions(uint64_t current, TopRefMap& top_refs, bool any_num_unreachables) noexcept;
  69. /// Write all changed array nodes into free space.
  70. ///
  71. /// Returns the new top ref. When in full durability mode, call
  72. /// commit() with the returned top ref.
  73. ref_type write_group();
  74. /// Flush changes to physical medium, then write the new top ref
  75. /// to the file header, then flush again. Pass the top ref
  76. /// returned by write_group().
  77. void commit(ref_type new_top_ref);
  78. size_t get_file_size() const noexcept;
  79. ref_type write_array(const char*, size_t, uint32_t) override;
  80. #ifdef REALM_DEBUG
  81. void dump();
  82. #endif
  83. size_t get_free_space_size() const
  84. {
  85. return m_free_space_size;
  86. }
  87. size_t get_locked_space_size() const
  88. {
  89. return m_locked_space_size;
  90. }
  91. size_t get_logical_size() const noexcept
  92. {
  93. return m_logical_size;
  94. }
  95. size_t get_evacuation_limit() const noexcept
  96. {
  97. return m_backoff ? 0 : m_evacuation_limit;
  98. }
  99. size_t get_free_list_size()
  100. {
  101. return m_free_positions.size() * size_per_free_list_entry();
  102. }
  103. std::vector<size_t>& get_evacuation_progress()
  104. {
  105. return m_evacuation_progress;
  106. }
  107. EvacuationStage get_evacuation_stage() const noexcept
  108. {
  109. if (m_evacuation_limit == 0) {
  110. if (m_backoff == 0) {
  111. return EvacuationStage::idle;
  112. }
  113. else {
  114. return EvacuationStage::blocked;
  115. }
  116. }
  117. else {
  118. if (m_backoff == 0) {
  119. return EvacuationStage::evacuating;
  120. }
  121. else {
  122. return EvacuationStage::waiting;
  123. }
  124. }
  125. }
  126. // Flush all cached memory mappings
  127. // Sync all cached memory mappings to disk - includes flush if needed
  128. void sync_all_mappings();
  129. // Flush all cached memory mappings from private to shared cache.
  130. void flush_all_mappings();
  131. private:
  132. friend class InMemoryWriter;
  133. struct FreeSpaceEntry {
  134. FreeSpaceEntry(size_t r, size_t s, uint64_t v)
  135. : ref(r)
  136. , size(s)
  137. , released_at_version(v)
  138. {
  139. }
  140. size_t ref;
  141. size_t size;
  142. uint64_t released_at_version;
  143. };
  144. static void merge_adjacent_entries_in_freelist(std::vector<FreeSpaceEntry>& list);
  145. static void move_free_in_file_to_size_map(const std::vector<GroupWriter::FreeSpaceEntry>& list,
  146. std::multimap<size_t, size_t>& size_map);
  147. class MapWindow;
  148. Group& m_group;
  149. SlabAlloc& m_alloc;
  150. Array m_free_positions; // 4th slot in Group::m_top
  151. Array m_free_lengths; // 5th slot in Group::m_top
  152. Array m_free_versions; // 6th slot in Group::m_top
  153. uint64_t m_current_version = 0;
  154. uint64_t m_oldest_reachable_version;
  155. TopRefMap m_top_ref_map;
  156. bool m_any_new_unreachables;
  157. size_t m_window_alignment;
  158. size_t m_free_space_size = 0;
  159. size_t m_locked_space_size = 0;
  160. size_t m_evacuation_limit;
  161. int64_t m_backoff;
  162. size_t m_logical_size = 0;
  163. Durability m_durability;
  164. util::WriteMarker* m_write_marker = nullptr;
  165. // m_free_in_file;
  166. std::vector<FreeSpaceEntry> m_not_free_in_file;
  167. std::vector<FreeSpaceEntry> m_under_evacuation;
  168. std::multimap<size_t, size_t> m_size_map;
  169. std::vector<size_t> m_evacuation_progress;
  170. using FreeListElement = std::multimap<size_t, size_t>::iterator;
  171. void read_in_freelist();
  172. size_t recreate_freelist(size_t reserve_pos);
  173. // Currently cached memory mappings. We keep as many as 16 1MB windows
  174. // open for writing. The allocator will favor sequential allocation
  175. // from a modest number of windows, depending upon fragmentation, so
  176. // 16 windows should be more than enough. If more than 16 windows are
  177. // needed, the least recently used is sync'ed and closed to make room
  178. // for a new one. The windows are kept in MRU (most recently used) order.
  179. const static int num_map_windows = 16;
  180. std::vector<std::unique_ptr<MapWindow>> m_map_windows;
  181. // Get a suitable memory mapping for later access:
  182. // potentially adding it to the cache, potentially closing
  183. // the least recently used and sync'ing it to disk
  184. MapWindow* get_window(ref_type start_ref, size_t size);
  185. /// Allocate a chunk of free space of the specified size. The
  186. /// specified size must be 8-byte aligned. Extend the file if
  187. /// required. The returned chunk is removed from the amount of
  188. /// remaing free space. The returned chunk is guaranteed to be
  189. /// within a single contiguous memory mapping.
  190. ///
  191. /// \return The position within the database file of the allocated
  192. /// chunk.
  193. size_t get_free_space(size_t size);
  194. /// Find a block of free space that is at least as big as the
  195. /// specified size and which will allow an allocation that is mapped
  196. /// inside a contiguous address range. The specified size does not
  197. /// need to be 8-byte aligned. Extend the file if required.
  198. /// The returned chunk is not removed from the amount of remaing
  199. /// free space.
  200. ///
  201. /// \return A pair (`chunk_ndx`, `chunk_size`) where `chunk_ndx`
  202. /// is the index of a chunk whose size is at least the requestd
  203. /// size, and `chunk_size` is the size of that chunk.
  204. FreeListElement reserve_free_space(size_t size);
  205. FreeListElement search_free_space_in_free_list_element(FreeListElement element, size_t size);
  206. /// Search only a range of the free list for a block as big as the
  207. /// specified size. Return a pair with index and size of the found chunk.
  208. FreeListElement search_free_space_in_part_of_freelist(size_t size);
  209. /// Extend the file to ensure that a chunk of free space of the
  210. /// specified size is available. The specified size does not need
  211. /// to be 8-byte aligned. This function guarantees that it will
  212. /// add at most one entry to the free-lists.
  213. ///
  214. /// \return A pair (`chunk_ndx`, `chunk_size`) where `chunk_ndx`
  215. /// is the index of a chunk whose size is at least the requestd
  216. /// size, and `chunk_size` is the size of that chunk.
  217. FreeListElement extend_free_space(size_t requested_size);
  218. template <class T>
  219. void write_array_at(T* translator, ref_type, const char* data, size_t size);
  220. FreeListElement split_freelist_chunk(FreeListElement, size_t alloc_pos);
  221. /// Backdate (if possible) any blocks in the freelist belonging to
  222. /// a version currently becomming unreachable. The effect of backdating
  223. /// is that many blocks can be freed earlier.
  224. void backdate();
  225. /// Debug helper - extends the TopRefMap with list of reachable blocks
  226. void map_reachable();
  227. size_t size_per_free_list_entry() const
  228. {
  229. // If current size is less than 128 MB, the database need not expand above 2 GB
  230. // which means that the positions and sizes can still be in 32 bit.
  231. return (m_logical_size < 0x8000000 ? 8 : 16) + 8;
  232. }
  233. };
  234. // Implementation:
  235. inline void GroupWriter::set_versions(uint64_t current, TopRefMap& top_refs, bool any_new_unreachables) noexcept
  236. {
  237. m_oldest_reachable_version = top_refs.begin()->first;
  238. REALM_ASSERT(m_oldest_reachable_version <= current);
  239. m_current_version = current;
  240. m_any_new_unreachables = any_new_unreachables;
  241. m_top_ref_map = std::move(top_refs);
  242. }
  243. } // namespace realm
  244. #endif // REALM_GROUP_WRITER_HPP