alloc_slab.hpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848
  1. /*************************************************************************
  2. *
  3. * Copyright 2016 Realm Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. **************************************************************************/
  18. #ifndef REALM_ALLOC_SLAB_HPP
  19. #define REALM_ALLOC_SLAB_HPP
  20. #include <cstdint> // unint8_t etc
  21. #include <vector>
  22. #include <map>
  23. #include <string>
  24. #include <atomic>
  25. #include <mutex>
  26. #include <realm/util/checked_mutex.hpp>
  27. #include <realm/util/features.h>
  28. #include <realm/util/file.hpp>
  29. #include <realm/util/functional.hpp>
  30. #include <realm/util/thread.hpp>
  31. #include <realm/alloc.hpp>
  32. #include <realm/disable_sync_to_disk.hpp>
  33. #include <realm/version_id.hpp>
  34. namespace realm {
  35. // Pre-declarations
  36. class Group;
  37. class GroupWriter;
  38. namespace util {
  39. struct SharedFileInfo;
  40. } // namespace util
  41. /// Thrown by Group and DB constructors if the specified file
  42. /// (or memory buffer) does not appear to contain a valid Realm
  43. /// database.
  44. struct InvalidDatabase;
  45. /// The allocator that is used to manage the memory of a Realm
  46. /// group, i.e., a Realm database.
  47. ///
  48. /// Optionally, it can be attached to an pre-existing database (file
  49. /// or memory buffer) which then becomes an immuatble part of the
  50. /// managed memory.
  51. ///
  52. /// To attach a slab allocator to a pre-existing database, call
  53. /// attach_file() or attach_buffer(). To create a new database
  54. /// in-memory, call attach_empty().
  55. ///
  56. /// For efficiency, this allocator manages its mutable memory as a set
  57. /// of slabs.
  58. class SlabAlloc : public Allocator {
  59. public:
  60. ~SlabAlloc() noexcept override;
  61. SlabAlloc();
  62. // Disable copying. Copying an allocator can produce double frees.
  63. SlabAlloc(const SlabAlloc&) = delete;
  64. SlabAlloc& operator=(const SlabAlloc&) = delete;
  65. /// \struct Config
  66. /// \brief Storage for combining setup flags for initialization to
  67. /// the SlabAlloc.
  68. ///
  69. /// \var Config::is_shared
  70. /// Must be true if, and only if we are called on behalf of DB.
  71. ///
  72. /// \var Config::read_only
  73. /// Open the file in read-only mode. This implies \a Config::no_create.
  74. ///
  75. /// \var Config::no_create
  76. /// Fail if the file does not already exist.
  77. ///
  78. /// \var Config::skip_validate
  79. /// Skip validation of file header. In a
  80. /// set of overlapping DBs, only the first one (the one
  81. /// that creates/initlializes the coordination file) may validate
  82. /// the header, otherwise it will result in a race condition.
  83. ///
  84. /// \var Config::encryption_key
  85. /// 32-byte key to use to encrypt and decrypt the backing storage,
  86. /// or nullptr to disable encryption.
  87. ///
  88. /// \var Config::session_initiator
  89. /// If set, the caller is the session initiator and
  90. /// guarantees exclusive access to the file. If attaching in
  91. /// read/write mode, the file is modified: files on streaming form
  92. /// is changed to non-streaming form, and if needed the file size
  93. /// is adjusted to match mmap boundaries.
  94. /// Must be set to false if is_shared is false.
  95. ///
  96. /// \var Config::clear_file
  97. /// Always initialize the file as if it was a newly
  98. /// created file and ignore any pre-existing contents. Requires that
  99. /// Config::session_initiator be true as well.
  100. struct Config {
  101. bool is_shared = false;
  102. bool read_only = false;
  103. bool no_create = false;
  104. bool skip_validate = false;
  105. bool session_initiator = false;
  106. bool clear_file = false;
  107. bool disable_sync = false;
  108. const char* encryption_key = nullptr;
  109. };
  110. struct Retry {
  111. };
  112. /// \brief Attach this allocator to the specified file.
  113. ///
  114. /// It is an error if this function is called at a time where the specified
  115. /// Realm file (file system inode) is modified asynchronously.
  116. ///
  117. /// In non-shared mode (when this function is called on behalf of a
  118. /// free-standing Group instance), it is the responsibility of the
  119. /// application to ensure that the Realm file is not modified concurrently
  120. /// from any other thread or process.
  121. ///
  122. /// In shared mode (when this function is called on behalf of a DB
  123. /// instance), the caller (DB::do_open()) must take steps to ensure
  124. /// cross-process mutual exclusion.
  125. ///
  126. /// Except for \a file_path, the parameters are passed in through a
  127. /// configuration object.
  128. ///
  129. /// \return The `ref` of the root node, or zero if there is none.
  130. ///
  131. /// Please note that attach_file can fail to attach to a file due to a
  132. /// collision with a writer extending the file. This can only happen if the
  133. /// caller is *not* the session initiator. When this happens, attach_file()
  134. /// throws SlabAlloc::Retry, and the caller must retry the call. The caller
  135. /// should check if it has become the session initiator before retrying.
  136. /// This can happen if the conflicting thread (or process) terminates or
  137. /// crashes before the next retry.
  138. ///
  139. /// \throw FileAccessError
  140. /// \throw SlabAlloc::Retry
  141. ref_type attach_file(const std::string& file_path, Config& cfg, util::WriteObserver* write_observer = nullptr);
  142. /// If the attached file is in streaming form, convert it to normal form.
  143. ///
  144. /// This conversion must be done as part of session initialization to avoid
  145. /// tricky coordination problems with other sessions at the time the
  146. /// conversion is done. However, we want to do it after all validation has
  147. /// completed to avoid writing to a file in an unknown format, so this
  148. /// cannot be done in `attach_file()`.
  149. void convert_from_streaming_form(ref_type top_ref);
  150. /// Get the attached file. Only valid when called on an allocator with
  151. /// an attached file.
  152. util::File& get_file();
  153. /// Attach this allocator to the specified memory buffer.
  154. ///
  155. /// It is an error to call this function on an attached
  156. /// allocator. Doing so will result in undefined behavor.
  157. ///
  158. /// \return The `ref` of the root node, or zero if there is none.
  159. ///
  160. /// \sa own_buffer()
  161. ///
  162. /// \throw InvalidDatabase
  163. ref_type attach_buffer(const char* data, size_t size);
  164. void init_in_memory_buffer();
  165. char* translate_memory_pos(ref_type ref) const noexcept;
  166. bool is_in_memory() const
  167. {
  168. return m_attach_mode == attach_Heap;
  169. }
  170. /// Reads file format from file header. Must be called from within a write
  171. /// transaction.
  172. int get_committed_file_format_version() const noexcept;
  173. bool is_file_on_streaming_form() const
  174. {
  175. const Header& header = *reinterpret_cast<const Header*>(m_data);
  176. return is_file_on_streaming_form(header);
  177. }
  178. /// Attach this allocator to an empty buffer.
  179. ///
  180. /// It is an error to call this function on an attached
  181. /// allocator. Doing so will result in undefined behavor.
  182. void attach_empty();
  183. /// Detach from a previously attached file or buffer.
  184. ///
  185. /// This function does not reset free space tracking. To
  186. /// completely reset the allocator, you must also call
  187. /// reset_free_space_tracking().
  188. ///
  189. /// This function has no effect if the allocator is already in the
  190. /// detached state (idempotency).
  191. void detach() noexcept;
  192. class DetachGuard;
  193. /// If a memory buffer has been attached using attach_buffer(),
  194. /// mark it as owned by this slab allocator. Behaviour is
  195. /// undefined if this function is called on a detached allocator,
  196. /// one that is not attached using attach_buffer(), or one for
  197. /// which this function has already been called during the latest
  198. /// attachment.
  199. void own_buffer() noexcept;
  200. /// Returns true if, and only if this allocator is currently
  201. /// in the attached state.
  202. bool is_attached() const noexcept;
  203. /// Returns true if, and only if this allocator is currently in
  204. /// the attached state and attachment was not established using
  205. /// attach_empty().
  206. bool nonempty_attachment() const noexcept;
  207. /// Reserve disk space now to avoid allocation errors at a later
  208. /// point in time, and to minimize on-disk fragmentation. In some
  209. /// cases, less fragmentation translates into improved
  210. /// performance. On flash or SSD-drives this is likely a waste.
  211. ///
  212. /// Note: File::prealloc() may misbehave under race conditions (see
  213. /// documentation of File::prealloc()). For that reason, to avoid race
  214. /// conditions, when this allocator is used in a transactional mode, this
  215. /// function may be called only when the caller has exclusive write
  216. /// access. In non-transactional mode it is the responsibility of the user
  217. /// to ensure non-concurrent file mutation.
  218. ///
  219. /// This function will call File::sync().
  220. ///
  221. /// It is an error to call this function on an allocator that is not
  222. /// attached to a file. Doing so will result in undefined behavior.
  223. void resize_file(size_t new_file_size);
  224. #ifdef REALM_DEBUG
  225. /// Deprecated method, only called from a unit test
  226. ///
  227. /// WARNING: This method is NOT thread safe on multiple platforms; see
  228. /// File::prealloc().
  229. ///
  230. /// Reserve disk space now to avoid allocation errors at a later point in
  231. /// time, and to minimize on-disk fragmentation. In some cases, less
  232. /// fragmentation translates into improved performance. On SSD-drives
  233. /// preallocation is likely a waste.
  234. ///
  235. /// When supported by the system, a call to this function will make the
  236. /// database file at least as big as the specified size, and cause space on
  237. /// the target device to be allocated (note that on many systems on-disk
  238. /// allocation is done lazily by default). If the file is already bigger
  239. /// than the specified size, the size will be unchanged, and on-disk
  240. /// allocation will occur only for the initial section that corresponds to
  241. /// the specified size.
  242. ///
  243. /// This function will call File::sync() if it changes the size of the file.
  244. ///
  245. /// It is an error to call this function on an allocator that is not
  246. /// attached to a file. Doing so will result in undefined behavior.
  247. void reserve_disk_space(size_t size_in_bytes);
  248. #endif
  249. /// Get the size of the attached database file or buffer in number
  250. /// of bytes. This size is not affected by new allocations. After
  251. /// attachment, it can only be modified by a call to update_reader_view().
  252. ///
  253. /// It is an error to call this function on a detached allocator,
  254. /// or one that was attached using attach_empty(). Doing so will
  255. /// result in undefined behavior.
  256. size_t get_baseline() const noexcept;
  257. /// Get the total amount of managed memory. This is the baseline plus the
  258. /// sum of the sizes of the allocated slabs. It includes any free space.
  259. ///
  260. /// It is an error to call this function on a detached
  261. /// allocator. Doing so will result in undefined behavior.
  262. size_t get_total_size() const noexcept;
  263. /// Mark all mutable memory (ref-space outside the attached file) as free
  264. /// space.
  265. void reset_free_space_tracking();
  266. /// Update the readers view of the file:
  267. ///
  268. /// Remap the attached file such that a prefix of the specified
  269. /// size becomes available in memory. If sucessfull,
  270. /// get_baseline() will return the specified new file size.
  271. ///
  272. /// It is an error to call this function on a detached allocator,
  273. /// or one that was not attached using attach_file(). Doing so
  274. /// will result in undefined behavior.
  275. ///
  276. /// Updates the memory mappings to reflect a new size for the file.
  277. /// Stale mappings are retained so that they remain valid for other threads,
  278. /// which haven't yet seen the file size change. The stale mappings are
  279. /// associated with a version count if one is provided.
  280. /// They are later purged by calls to purge_old_mappings().
  281. /// The version parameter is subtly different from the mapping_version obtained
  282. /// by get_mapping_version() below. The mapping version changes whenever a
  283. /// ref->ptr translation changes, and is used by Group to enforce re-translation.
  284. void update_reader_view(size_t file_size);
  285. void purge_old_mappings(uint64_t oldest_live_version, uint64_t youngest_live_version);
  286. void init_mapping_management(uint64_t currently_live_version);
  287. /// Get an ID for the current mapping version. This ID changes whenever any part
  288. /// of an existing mapping is changed. Such a change requires all refs to be
  289. /// retranslated to new pointers. This will happen whenever the reader view
  290. /// is extended unless the old size was aligned to a section boundary.
  291. uint64_t get_mapping_version()
  292. {
  293. return m_mapping_version;
  294. }
  295. /// Returns true initially, and after a call to reset_free_space_tracking()
  296. /// up until the point of the first call to SlabAlloc::alloc(). Note that a
  297. /// call to SlabAlloc::alloc() corresponds to a mutation event.
  298. bool is_free_space_clean() const noexcept;
  299. /// Returns the amount of memory requested by calls to SlabAlloc::alloc().
  300. size_t get_commit_size() const
  301. {
  302. return m_commit_size;
  303. }
  304. size_t get_file_size() const
  305. {
  306. return (m_attach_mode == attach_SharedFile) ? size_t(m_file.get_size()) : m_virtual_file_size;
  307. }
  308. /// Returns the total amount of memory currently allocated in slab area
  309. size_t get_allocated_size() const noexcept;
  310. /// Returns total amount of slab for all slab allocators
  311. static size_t get_total_slab_size() noexcept;
  312. /// Hooks used to keep the encryption layer informed of the start and stop
  313. /// of transactions.
  314. void note_reader_start(const void* reader_id);
  315. void note_reader_end(const void* reader_id) noexcept;
  316. void verify() const override;
  317. #ifdef REALM_DEBUG
  318. void enable_debug(bool enable)
  319. {
  320. m_debug_out = enable;
  321. }
  322. bool is_all_free() const;
  323. void print() const;
  324. #endif
  325. protected:
  326. MemRef do_alloc(const size_t size) override;
  327. MemRef do_realloc(ref_type, char*, size_t old_size, size_t new_size) override;
  328. void do_free(ref_type, char*) override;
  329. char* do_translate(ref_type) const noexcept override;
  330. /// Returns the first section boundary *above* the given position.
  331. size_t get_upper_section_boundary(size_t start_pos) const noexcept;
  332. /// Returns the section boundary at or above the given size
  333. size_t align_size_to_section_boundary(size_t size) const noexcept;
  334. /// Returns the first section boundary *at or below* the given position.
  335. size_t get_lower_section_boundary(size_t start_pos) const noexcept;
  336. /// Returns true if the given position is at a section boundary
  337. bool matches_section_boundary(size_t pos) const noexcept;
  338. /// Actually compute the starting offset of a section. Only used to initialize
  339. /// a table of predefined results, which are then used by get_section_base().
  340. size_t compute_section_base(size_t index) const noexcept;
  341. /// Find a possible allocation of 'request_size' that will fit into a section
  342. /// which is inside the range from 'start_pos' to 'start_pos'+'free_chunk_size'
  343. /// If found return the position, if not return 0.
  344. size_t find_section_in_range(size_t start_pos, size_t free_chunk_size, size_t request_size) const noexcept;
  345. void schedule_refresh_of_outdated_encrypted_pages();
  346. private:
  347. enum AttachMode {
  348. attach_None, // Nothing is attached
  349. attach_OwnedBuffer, // We own the buffer (m_data = nullptr for empty buffer)
  350. attach_UsersBuffer, // We do not own the buffer
  351. attach_SharedFile, // On behalf of DB
  352. attach_UnsharedFile, // Not on behalf of DB
  353. attach_Heap // Memory only DB
  354. };
  355. // A slab is a dynamically allocated contiguous chunk of memory used to
  356. // extend the amount of space available for database node
  357. // storage. Inter-node references are represented as file offsets
  358. // (a.k.a. "refs"), and each slab creates an apparently seamless extension
  359. // of this file offset addressable space. Slabs are stored as rows in the
  360. // Slabs table in order of ascending file offsets.
  361. struct Slab {
  362. ref_type ref_end;
  363. char* addr;
  364. size_t size;
  365. Slab(ref_type r, size_t s);
  366. ~Slab();
  367. Slab(const Slab&) = delete;
  368. Slab(Slab&& other) noexcept
  369. : ref_end(other.ref_end)
  370. , addr(other.addr)
  371. , size(other.size)
  372. {
  373. other.addr = nullptr;
  374. other.size = 0;
  375. other.ref_end = 0;
  376. }
  377. Slab& operator=(const Slab&) = delete;
  378. Slab& operator=(Slab&&) = delete;
  379. };
  380. struct MemBuffer {
  381. char* addr;
  382. size_t size;
  383. ref_type start_ref;
  384. MemBuffer()
  385. : addr(nullptr)
  386. , size(0)
  387. , start_ref(0)
  388. {
  389. }
  390. MemBuffer(size_t s, ref_type ref)
  391. : addr(new char[s])
  392. , size(s)
  393. , start_ref(ref)
  394. {
  395. }
  396. ~MemBuffer()
  397. {
  398. if (addr)
  399. delete[] addr;
  400. }
  401. MemBuffer(MemBuffer&& other) noexcept
  402. : addr(other.addr)
  403. , size(other.size)
  404. , start_ref(other.start_ref)
  405. {
  406. other.addr = nullptr;
  407. other.size = 0;
  408. }
  409. };
  410. // free blocks that are in the slab area are managed using the following structures:
  411. // - FreeBlock: Placed at the start of any free space. Holds the 'ref' corresponding to
  412. // the start of the space, and prev/next links used to place it in a size-specific
  413. // freelist.
  414. // - BetweenBlocks: Structure sitting between any two free OR allocated spaces.
  415. // describes the size of the space before and after.
  416. // Each slab (area obtained from the underlying system) has a terminating BetweenBlocks
  417. // at the beginning and at the end of the Slab.
  418. struct FreeBlock {
  419. ref_type ref; // ref for this entry. Saves a reverse translate / representing links as refs
  420. FreeBlock* prev; // circular doubly linked list
  421. FreeBlock* next;
  422. void clear_links()
  423. {
  424. prev = next = nullptr;
  425. }
  426. void unlink();
  427. };
  428. struct BetweenBlocks { // stores sizes and used/free status of blocks before and after.
  429. int32_t block_before_size; // negated if block is in use,
  430. int32_t block_after_size; // positive if block is free - and zero at end
  431. };
  432. Config m_cfg;
  433. using FreeListMap = std::map<int, FreeBlock*>; // log(N) addressing for larger blocks
  434. FreeListMap m_block_map;
  435. // abstract notion of a freelist - used to hide whether a freelist
  436. // is residing in the small blocks or the large blocks structures.
  437. struct FreeList {
  438. int size = 0; // size of every element in the list, 0 if not found
  439. FreeListMap::iterator it;
  440. bool found_something()
  441. {
  442. return size != 0;
  443. }
  444. bool found_exact(int sz)
  445. {
  446. return size == sz;
  447. }
  448. };
  449. // simple helper functions for accessing/navigating blocks and betweenblocks (TM)
  450. BetweenBlocks* bb_before(FreeBlock* entry) const
  451. {
  452. return reinterpret_cast<BetweenBlocks*>(entry) - 1;
  453. }
  454. BetweenBlocks* bb_after(FreeBlock* entry) const
  455. {
  456. auto bb = bb_before(entry);
  457. size_t sz = bb->block_after_size;
  458. char* addr = reinterpret_cast<char*>(entry) + sz;
  459. return reinterpret_cast<BetweenBlocks*>(addr);
  460. }
  461. FreeBlock* block_before(BetweenBlocks* bb) const
  462. {
  463. size_t sz = bb->block_before_size;
  464. if (sz <= 0)
  465. return nullptr; // only blocks that are not in use
  466. char* addr = reinterpret_cast<char*>(bb) - sz;
  467. return reinterpret_cast<FreeBlock*>(addr);
  468. }
  469. FreeBlock* block_after(BetweenBlocks* bb) const
  470. {
  471. if (bb->block_after_size <= 0)
  472. return nullptr;
  473. return reinterpret_cast<FreeBlock*>(bb + 1);
  474. }
  475. int size_from_block(FreeBlock* entry) const
  476. {
  477. return bb_before(entry)->block_after_size;
  478. }
  479. void mark_allocated(FreeBlock* entry);
  480. // mark the entry freed in bordering BetweenBlocks. Also validate size.
  481. void mark_freed(FreeBlock* entry, int size);
  482. // hook for the memory verifier in Group.
  483. template <typename Func>
  484. void for_all_free_entries(Func f) const;
  485. // Main entry points for alloc/free:
  486. FreeBlock* allocate_block(int size);
  487. void free_block(ref_type ref, FreeBlock* addr);
  488. // Searching/manipulating freelists
  489. FreeList find(int size);
  490. FreeList find_larger(FreeList hint, int size);
  491. FreeBlock* pop_freelist_entry(FreeList list);
  492. void push_freelist_entry(FreeBlock* entry);
  493. void remove_freelist_entry(FreeBlock* element);
  494. void rebuild_freelists_from_slab();
  495. void clear_freelists();
  496. // grow the slab area.
  497. // returns a free block large enough to handle the request.
  498. FreeBlock* grow_slab(int size);
  499. // create a single free chunk with "BetweenBlocks" at both ends and a
  500. // single free chunk between them. This free chunk will be of size:
  501. // slab_size - 2 * sizeof(BetweenBlocks)
  502. FreeBlock* slab_to_entry(const Slab& slab, ref_type ref_start);
  503. // breaking/merging of blocks
  504. FreeBlock* get_prev_block_if_mergeable(FreeBlock* block);
  505. FreeBlock* get_next_block_if_mergeable(FreeBlock* block);
  506. // break 'block' to give it 'new_size'. Return remaining block.
  507. // If the block is too small to split, return nullptr.
  508. FreeBlock* break_block(FreeBlock* block, int new_size);
  509. FreeBlock* merge_blocks(FreeBlock* first, FreeBlock* second);
  510. // Values of each used bit in m_flags
  511. enum {
  512. flags_SelectBit = 1,
  513. };
  514. // 24 bytes
  515. struct Header {
  516. uint64_t m_top_ref[2]; // 2 * 8 bytes
  517. // Info-block 8-bytes
  518. uint8_t m_mnemonic[4]; // "T-DB"
  519. uint8_t m_file_format[2]; // See `library_file_format`
  520. uint8_t m_reserved;
  521. // bit 0 of m_flags is used to select between the two top refs.
  522. uint8_t m_flags;
  523. };
  524. // 16 bytes
  525. struct StreamingFooter {
  526. uint64_t m_top_ref;
  527. uint64_t m_magic_cookie;
  528. };
  529. // Description of to-be-deleted memory mapping
  530. struct OldMapping {
  531. uint64_t replaced_at_version;
  532. util::File::Map<char> mapping;
  533. };
  534. struct OldRefTranslation {
  535. OldRefTranslation(uint64_t v, size_t c, RefTranslation* m) noexcept
  536. : replaced_at_version(v)
  537. , translation_count(c)
  538. , translations(m)
  539. {
  540. }
  541. uint64_t replaced_at_version;
  542. size_t translation_count;
  543. std::unique_ptr<RefTranslation[]> translations;
  544. };
  545. static_assert(sizeof(Header) == 24, "Bad header size");
  546. static_assert(sizeof(StreamingFooter) == 16, "Bad footer size");
  547. static const Header empty_file_header;
  548. static void init_streaming_header(Header*, int file_format_version);
  549. static const uint_fast64_t footer_magic_cookie = 0x3034125237E526C8ULL;
  550. util::RaceDetector changes;
  551. void verify_old_translations(uint64_t verify_old_translations);
  552. // mappings used by newest transactions - additional mappings may be open
  553. // and in use by older transactions. These translations are in m_old_mappings.
  554. struct MapEntry {
  555. util::File::Map<char> primary_mapping;
  556. size_t lowest_possible_xover_offset = 0;
  557. util::File::Map<char> xover_mapping;
  558. };
  559. std::vector<MapEntry> m_mappings;
  560. size_t m_translation_table_size = 0;
  561. std::atomic<uint64_t> m_mapping_version = 1;
  562. uint64_t m_youngest_live_version = 1;
  563. std::mutex m_mapping_mutex;
  564. util::File m_file;
  565. util::SharedFileInfo* m_realm_file_info = nullptr;
  566. // vectors where old mappings, are held from deletion to ensure translations are
  567. // kept open and ref->ptr translations work for other threads..
  568. std::vector<OldMapping> m_old_mappings;
  569. std::vector<OldRefTranslation> m_old_translations;
  570. // Rebuild the ref translations in a thread-safe manner. Save the old one along with it's
  571. // versioning information for later deletion - 'requires_new_fast_mapping' must be
  572. // true if there are changes to entries among the existing translations. Must be called
  573. // with m_mapping_mutex locked.
  574. void rebuild_translations(bool requires_new_fast_mapping, size_t old_num_sections);
  575. // Add a translation covering a new section in the slab area. The translation is always
  576. // added at the end.
  577. void extend_fast_mapping_with_slab(char* address);
  578. void get_or_add_xover_mapping(RefTranslation& txl, size_t index, size_t offset, size_t size) override;
  579. const char* m_data = nullptr;
  580. size_t m_initial_section_size = 0;
  581. int m_section_shifts = 0;
  582. AttachMode m_attach_mode = attach_None;
  583. enum FeeeSpaceState {
  584. free_space_Clean,
  585. free_space_Dirty,
  586. free_space_Invalid,
  587. };
  588. constexpr static int minimal_alloc = 128 * 1024;
  589. constexpr static int maximal_alloc = 1 << section_shift;
  590. /// When set to free_space_Invalid, the free lists are no longer
  591. /// up-to-date. This happens if do_free() or
  592. /// reset_free_space_tracking() fails, presumably due to
  593. /// std::bad_alloc being thrown during updating of the free space
  594. /// list. In this this case, alloc(), realloc_(), and
  595. /// get_free_read_only() must throw. This member is deliberately
  596. /// placed here (after m_attach_mode) in the hope that it leads to
  597. /// less padding between members due to alignment requirements.
  598. FeeeSpaceState m_free_space_state = free_space_Clean;
  599. typedef std::vector<Slab> Slabs;
  600. using Chunks = std::map<ref_type, size_t>;
  601. Slabs m_slabs;
  602. std::vector<MemBuffer> m_virtual_file_buffer;
  603. Chunks m_free_read_only;
  604. util::WriteObserver* m_write_observer = nullptr;
  605. size_t m_commit_size = 0;
  606. size_t m_virtual_file_size = 0;
  607. bool m_debug_out = false;
  608. /// Throws if free-lists are no longer valid.
  609. size_t consolidate_free_read_only();
  610. /// Throws if free-lists are no longer valid.
  611. const Chunks& get_free_read_only() const;
  612. /// Throws InvalidDatabase if the file is not a Realm file, if the file is
  613. /// corrupted, or if the specified encryption key is incorrect. This
  614. /// function will not detect all forms of corruption, though.
  615. /// Returns the top_ref for the latest commit.
  616. ref_type validate_header(const char* data, size_t len, const std::string& path);
  617. ref_type validate_header(const Header* header, const StreamingFooter* footer, size_t size,
  618. const std::string& path, bool is_encrypted = false);
  619. void throw_header_exception(std::string msg, const Header& header, const std::string& path);
  620. static bool is_file_on_streaming_form(const Header& header);
  621. /// Read the top_ref from the given buffer and set m_file_on_streaming_form
  622. /// if the buffer contains a file in streaming form
  623. static ref_type get_top_ref(const char* data, size_t len);
  624. // Gets the path of the attached file, or other relevant debugging info.
  625. std::string get_file_path_for_assertions() const;
  626. static bool ref_less_than_slab_ref_end(ref_type, const Slab&) noexcept;
  627. friend class DB;
  628. friend class Group;
  629. friend class GroupWriter;
  630. };
  631. class SlabAlloc::DetachGuard {
  632. public:
  633. DetachGuard(SlabAlloc& alloc) noexcept
  634. : m_alloc(&alloc)
  635. {
  636. }
  637. ~DetachGuard() noexcept;
  638. SlabAlloc* release() noexcept;
  639. private:
  640. SlabAlloc* m_alloc;
  641. };
  642. // Implementation:
  643. struct InvalidDatabase : FileAccessError {
  644. InvalidDatabase(const std::string& msg, const std::string& path)
  645. : FileAccessError(ErrorCodes::InvalidDatabase,
  646. path.empty() ? "Failed to memory buffer:" + msg
  647. : util::format("Failed to open Realm file at path '%1': %2", path, msg),
  648. path)
  649. {
  650. }
  651. };
  652. inline void SlabAlloc::own_buffer() noexcept
  653. {
  654. REALM_ASSERT_3(m_attach_mode, ==, attach_UsersBuffer);
  655. REALM_ASSERT(m_data);
  656. m_attach_mode = attach_OwnedBuffer;
  657. }
  658. inline bool SlabAlloc::is_attached() const noexcept
  659. {
  660. return m_attach_mode != attach_None;
  661. }
  662. inline bool SlabAlloc::nonempty_attachment() const noexcept
  663. {
  664. return is_attached() && m_data;
  665. }
  666. inline size_t SlabAlloc::get_baseline() const noexcept
  667. {
  668. REALM_ASSERT_DEBUG(is_attached());
  669. return m_baseline.load(std::memory_order_relaxed);
  670. }
  671. inline bool SlabAlloc::is_free_space_clean() const noexcept
  672. {
  673. return m_free_space_state == free_space_Clean;
  674. }
  675. inline SlabAlloc::DetachGuard::~DetachGuard() noexcept
  676. {
  677. if (m_alloc)
  678. m_alloc->detach();
  679. }
  680. inline SlabAlloc* SlabAlloc::DetachGuard::release() noexcept
  681. {
  682. SlabAlloc* alloc = m_alloc;
  683. m_alloc = nullptr;
  684. return alloc;
  685. }
  686. inline bool SlabAlloc::ref_less_than_slab_ref_end(ref_type ref, const Slab& slab) noexcept
  687. {
  688. return ref < slab.ref_end;
  689. }
  690. inline size_t SlabAlloc::get_upper_section_boundary(size_t start_pos) const noexcept
  691. {
  692. return get_section_base(1 + get_section_index(start_pos));
  693. }
  694. inline size_t SlabAlloc::align_size_to_section_boundary(size_t size) const noexcept
  695. {
  696. if (matches_section_boundary(size))
  697. return size;
  698. else
  699. return get_upper_section_boundary(size);
  700. }
  701. inline size_t SlabAlloc::get_lower_section_boundary(size_t start_pos) const noexcept
  702. {
  703. return get_section_base(get_section_index(start_pos));
  704. }
  705. inline bool SlabAlloc::matches_section_boundary(size_t pos) const noexcept
  706. {
  707. auto boundary = get_lower_section_boundary(pos);
  708. return pos == boundary;
  709. }
  710. template <typename Func>
  711. void SlabAlloc::for_all_free_entries(Func f) const
  712. {
  713. ref_type ref = align_size_to_section_boundary(m_baseline.load(std::memory_order_relaxed));
  714. for (const auto& e : m_slabs) {
  715. BetweenBlocks* bb = reinterpret_cast<BetweenBlocks*>(e.addr);
  716. REALM_ASSERT(bb->block_before_size == 0);
  717. while (1) {
  718. int size = bb->block_after_size;
  719. f(ref, sizeof(BetweenBlocks));
  720. ref += sizeof(BetweenBlocks);
  721. if (size == 0) {
  722. break;
  723. }
  724. if (size > 0) { // freeblock.
  725. f(ref, size);
  726. bb = reinterpret_cast<BetweenBlocks*>(reinterpret_cast<char*>(bb) + sizeof(BetweenBlocks) + size);
  727. ref += size;
  728. }
  729. else {
  730. bb = reinterpret_cast<BetweenBlocks*>(reinterpret_cast<char*>(bb) + sizeof(BetweenBlocks) - size);
  731. ref -= size;
  732. }
  733. }
  734. // any gaps in ref-space is reported as a free block to the validator:
  735. auto next_ref = align_size_to_section_boundary(ref);
  736. if (next_ref > ref) {
  737. f(ref, next_ref - ref);
  738. ref = next_ref;
  739. }
  740. }
  741. }
  742. } // namespace realm
  743. #endif // REALM_ALLOC_SLAB_HPP