/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef TSL_HTRIE_HASH_H #define TSL_HTRIE_HASH_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "array-hash/array_map.h" #include "array-hash/array_set.h" /* * __has_include is a bit useless (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79433), * check also __cplusplus version. */ #ifdef __has_include # if __has_include() && __cplusplus >= 201703L # define TSL_HT_HAS_STRING_VIEW # endif #endif #ifdef TSL_HT_HAS_STRING_VIEW # include #endif #ifdef TSL_DEBUG # define tsl_ht_assert(expr) assert(expr) #else # define tsl_ht_assert(expr) (static_cast(0)) #endif namespace tsl { namespace detail_htrie_hash { template struct is_iterator: std::false_type { }; template struct is_iterator::iterator_category, void>::value>::type>: std::true_type { }; template struct is_related: std::false_type {}; template struct is_related: std::is_same::type>::type, typename std::remove_cv::type>::type> {}; template static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") { T ret = static_cast(value); if(static_cast(ret) != value) { THROW(std::runtime_error, error_message); } const bool is_same_signedness = (std::is_unsigned::value && std::is_unsigned::value) || (std::is_signed::value && std::is_signed::value); if(!is_same_signedness && (ret < T{}) != (value < U{})) { THROW(std::runtime_error, error_message); } return ret; } template struct value_node { /* * Avoid conflict with copy constructor 'value_node(const value_node&)'. If we call the copy constructor * with a mutable reference 'value_node(value_node&)', we don't want the forward constructor to be called. */ template::value>::type* = nullptr> value_node(Args&&... args): m_value(std::forward(args)...) { } T m_value; }; template<> struct value_node { }; /** * T should be void if there is no value associated to a key (in a set for example). */ template class htrie_hash { private: template using has_value = typename std::integral_constant::value>; static_assert(std::is_same::value, "char is the only supported CharT type for now."); static const std::size_t ALPHABET_SIZE = std::numeric_limits::type>::max() + 1; public: template class htrie_hash_iterator; using char_type = CharT; using key_size_type = KeySizeT; using size_type = std::size_t; using hasher = Hash; using iterator = htrie_hash_iterator; using const_iterator = htrie_hash_iterator; using prefix_iterator = htrie_hash_iterator; using const_prefix_iterator = htrie_hash_iterator; private: using array_hash_type = typename std::conditional< has_value::value, tsl::array_map, false, KeySizeT, std::uint16_t, tsl::ah::power_of_two_growth_policy<4>>, tsl::array_set, false, KeySizeT, std::uint16_t, tsl::ah::power_of_two_growth_policy<4>>>::type; private: /* * The tree is mainly composed of two nodes types: trie_node and hash_node which both have anode as base class. * Each child is either a hash_node or a trie_node. * * A hash_node is always a leaf node, it doesn't have any child. * * Example: * | ... | a |.. ..................... | f | ... | trie_node_1 * \ \ * hash_node_1 |array_hash = {"dd"}| |...| a | ... | trie_node_2 * / * |array_hash = {"ble", "bric", "lse"}| hash_node_2 * * * Each trie_node may also have a value node, which contains a value T, if the trie_node marks * the end of a string value. * * A trie node should at least have one child or a value node. There can't be a trie node without * any child and no value node. */ using value_node = tsl::detail_htrie_hash::value_node; class trie_node; class hash_node; // TODO better encapsulate operations modifying the tree. class anode { friend class trie_node; public: /* * TODO Avoid the virtual to economize 8 bytes. We could use a custom deleter in the std::unique_ptr * we use (as we know if an anode is a trie_node or hash_node). */ virtual ~anode() = default; bool is_trie_node() const noexcept { return m_node_type == node_type::TRIE_NODE; } bool is_hash_node() const noexcept { return m_node_type == node_type::HASH_NODE; } trie_node& as_trie_node() noexcept { tsl_ht_assert(is_trie_node()); return static_cast(*this); } hash_node& as_hash_node() noexcept { tsl_ht_assert(is_hash_node()); return static_cast(*this); } const trie_node& as_trie_node() const noexcept { tsl_ht_assert(is_trie_node()); return static_cast(*this); } const hash_node& as_hash_node() const noexcept { tsl_ht_assert(is_hash_node()); return static_cast(*this); } /** * @see m_child_of_char */ CharT child_of_char() const noexcept { tsl_ht_assert(parent() != nullptr); return m_child_of_char; } /** * Return nullptr if none. */ trie_node* parent() noexcept { return m_parent_node; } const trie_node* parent() const noexcept { return m_parent_node; } protected: enum class node_type: unsigned char { HASH_NODE, TRIE_NODE }; anode(node_type node_type_): m_node_type(node_type_), m_child_of_char(0), m_parent_node(nullptr) { } anode(node_type node_type_, CharT child_of_char): m_node_type(node_type_), m_child_of_char(child_of_char), m_parent_node(nullptr) { } protected: node_type m_node_type; /** * If the node has a parent, then it's a descendant of some char. * * Example: * | ... | a | b | ... | trie_node_1 * \ * |...| a | ... | trie_node_2 * / * |array_hash| hash_node_1 * * trie_node_2 is a child of trie_node_1 through 'b', it will have 'b' as m_child_of_char. * hash_node_1 is a child of trie_node_2 through 'a', it will have 'a' as m_child_of_char. * * trie_node_1 has no parent, its m_child_of_char is undefined. */ CharT m_child_of_char; trie_node* m_parent_node; }; // Give the position in trie_node::m_children corresponding to the character c static std::size_t as_position(CharT c) noexcept { return static_cast(static_cast::type>(c)); } class trie_node: public anode { public: trie_node(): anode(anode::node_type::TRIE_NODE), m_value_node(nullptr), m_children() { } trie_node(const trie_node& other): anode(anode::node_type::TRIE_NODE, other.m_child_of_char), m_value_node(nullptr), m_children() { if(other.m_value_node != nullptr) { m_value_node = make_unique(*other.m_value_node); } // TODO avoid recursion for(std::size_t ichild = 0; ichild < other.m_children.size(); ichild++) { if(other.m_children[ichild] != nullptr) { if(other.m_children[ichild]->is_hash_node()) { m_children[ichild] = make_unique(other.m_children[ichild]->as_hash_node()); } else { m_children[ichild] = make_unique(other.m_children[ichild]->as_trie_node()); } m_children[ichild]->m_parent_node = this; } } } trie_node(trie_node&& other) = delete; trie_node& operator=(const trie_node& other) = delete; trie_node& operator=(trie_node&& other) = delete; /** * Return nullptr if none. */ anode* first_child() noexcept { return const_cast(static_cast(this)->first_child()); } const anode* first_child() const noexcept { for(std::size_t ichild = 0; ichild < m_children.size(); ichild++) { if(m_children[ichild] != nullptr) { return m_children[ichild].get(); } } return nullptr; } /** * Get the next_child that come after current_child. Return nullptr if no next child. */ anode* next_child(const anode& current_child) noexcept { return const_cast(static_cast(this)->next_child(current_child)); } const anode* next_child(const anode& current_child) const noexcept { tsl_ht_assert(current_child.parent() == this); for(std::size_t ichild = as_position(current_child.child_of_char()) + 1; ichild < m_children.size(); ichild++) { if(m_children[ichild] != nullptr) { return m_children[ichild].get(); } } return nullptr; } /** * Return the first left-descendant trie node with an m_value_node. If none return the most left trie node. */ trie_node& most_left_descendant_value_trie_node() noexcept { return const_cast(static_cast(this)->most_left_descendant_value_trie_node()); } const trie_node& most_left_descendant_value_trie_node() const noexcept { const trie_node* current_node = this; while(true) { if(current_node->m_value_node != nullptr) { return *current_node; } const anode* first_child = current_node->first_child(); tsl_ht_assert(first_child != nullptr); // a trie_node must either have a value_node or at least one child. if(first_child->is_hash_node()) { return *current_node; } current_node = &first_child->as_trie_node(); } } size_type nb_children() const noexcept { return std::count_if(m_children.cbegin(), m_children.cend(), [](const std::unique_ptr& n) { return n != nullptr; }); } bool empty() const noexcept { return std::all_of(m_children.cbegin(), m_children.cend(), [](const std::unique_ptr& n) { return n == nullptr; }); } std::unique_ptr& child(CharT for_char) noexcept { return m_children[as_position(for_char)]; } const std::unique_ptr& child(CharT for_char) const noexcept { return m_children[as_position(for_char)]; } typename std::array, ALPHABET_SIZE>::iterator begin() noexcept { return m_children.begin(); } typename std::array, ALPHABET_SIZE>::iterator end() noexcept { return m_children.end(); } void set_child(CharT for_char, std::unique_ptr child) noexcept { if(child != nullptr) { child->m_child_of_char = for_char; child->m_parent_node = this; } m_children[as_position(for_char)] = std::move(child); } std::unique_ptr& val_node() noexcept { return m_value_node; } const std::unique_ptr& val_node() const noexcept { return m_value_node; } private: // TODO Avoid storing a value_node when has_value::value is false std::unique_ptr m_value_node; /** * Each character CharT corresponds to one position in the array. To convert a character * to a position use the as_position method. * * TODO Try to reduce the size of m_children with a hash map, linear/binary search on array, ... * TODO Store number of non-null values in m_children. Check if we can store this value in the alignment * space as we don't want the node to get bigger (empty() and nb_children() are rarely used so it is * not an important variable). */ std::array, ALPHABET_SIZE> m_children; }; class hash_node: public anode { public: hash_node(const Hash& hash, float max_load_factor): hash_node(HASH_NODE_DEFAULT_INIT_BUCKETS_COUNT, hash, max_load_factor) { } hash_node(size_type bucket_count, const Hash& hash, float max_load_factor): anode(anode::node_type::HASH_NODE), m_array_hash(bucket_count, hash) { m_array_hash.max_load_factor(max_load_factor); } hash_node(array_hash_type&& array_hash) noexcept(std::is_nothrow_move_constructible::value): anode(anode::node_type::HASH_NODE), m_array_hash(std::move(array_hash)) { } hash_node(const hash_node& other) = default; hash_node(hash_node&& other) = delete; hash_node& operator=(const hash_node& other) = delete; hash_node& operator=(hash_node&& other) = delete; array_hash_type& array_hash() noexcept { return m_array_hash; } const array_hash_type& array_hash() const noexcept { return m_array_hash; } private: array_hash_type m_array_hash; }; public: template class htrie_hash_iterator { friend class htrie_hash; private: using anode_type = typename std::conditional::type; using trie_node_type = typename std::conditional::type; using hash_node_type = typename std::conditional::type; using array_hash_iterator_type = typename std::conditional::type; public: using iterator_category = std::forward_iterator_tag; using value_type = typename std::conditional::value, T, void>::type; using difference_type = std::ptrdiff_t; using reference = typename std::conditional< has_value::value, typename std::conditional::type, typename std::add_lvalue_reference::type>::type, void>::type; using pointer = typename std::conditional< has_value::value, typename std::conditional::type, void>::type; private: /** * Start reading from start_hash_node->array_hash().begin(). */ htrie_hash_iterator(hash_node_type& start_hash_node) noexcept: htrie_hash_iterator(start_hash_node, start_hash_node.array_hash().begin()) { } /** * Start reading from iterator begin in start_hash_node->array_hash(). */ htrie_hash_iterator(hash_node_type& start_hash_node, array_hash_iterator_type begin) noexcept: m_current_trie_node(start_hash_node.parent()), m_current_hash_node(&start_hash_node), m_array_hash_iterator(begin), m_array_hash_end_iterator(start_hash_node.array_hash().end()), m_read_trie_node_value(false) { tsl_ht_assert(!m_current_hash_node->array_hash().empty()); } /** * Start reading from the value in start_trie_node. start_trie_node->val_node() should be non-null. */ htrie_hash_iterator(trie_node_type& start_trie_node) noexcept: m_current_trie_node(&start_trie_node), m_current_hash_node(nullptr), m_read_trie_node_value(true) { tsl_ht_assert(m_current_trie_node->val_node() != nullptr); } template::type* = nullptr> htrie_hash_iterator(trie_node_type* tnode, hash_node_type* hnode, array_hash_iterator_type begin, array_hash_iterator_type end, bool read_trie_node_value) noexcept: m_current_trie_node(tnode), m_current_hash_node(hnode), m_array_hash_iterator(begin), m_array_hash_end_iterator(end), m_read_trie_node_value(read_trie_node_value) { } template::type* = nullptr> htrie_hash_iterator(trie_node_type* tnode, hash_node_type* hnode, array_hash_iterator_type begin, array_hash_iterator_type end, bool read_trie_node_value, std::basic_string prefix_filter) noexcept: m_current_trie_node(tnode), m_current_hash_node(hnode), m_array_hash_iterator(begin), m_array_hash_end_iterator(end), m_read_trie_node_value(read_trie_node_value), m_prefix_filter(std::move(prefix_filter)) { } public: htrie_hash_iterator() noexcept { } // Copy constructor from iterator to const_iterator. template::type* = nullptr> htrie_hash_iterator(const htrie_hash_iterator& other) noexcept: m_current_trie_node(other.m_current_trie_node), m_current_hash_node(other.m_current_hash_node), m_array_hash_iterator(other.m_array_hash_iterator), m_array_hash_end_iterator(other.m_array_hash_end_iterator), m_read_trie_node_value(other.m_read_trie_node_value) { } // Copy constructor from iterator to const_iterator. template::type* = nullptr> htrie_hash_iterator(const htrie_hash_iterator& other) noexcept: m_current_trie_node(other.m_current_trie_node), m_current_hash_node(other.m_current_hash_node), m_array_hash_iterator(other.m_array_hash_iterator), m_array_hash_end_iterator(other.m_array_hash_end_iterator), m_read_trie_node_value(other.m_read_trie_node_value), m_prefix_filter(other.m_prefix_filter) { } htrie_hash_iterator(const htrie_hash_iterator& other) = default; htrie_hash_iterator(htrie_hash_iterator&& other) = default; htrie_hash_iterator& operator=(const htrie_hash_iterator& other) = default; htrie_hash_iterator& operator=(htrie_hash_iterator&& other) = default; void key(std::basic_string& key_buffer_out) const { key_buffer_out.clear(); trie_node_type* tnode = m_current_trie_node; while(tnode != nullptr && tnode->parent() != nullptr) { key_buffer_out.push_back(tnode->child_of_char()); tnode = tnode->parent(); } std::reverse(key_buffer_out.begin(), key_buffer_out.end()); if(!m_read_trie_node_value) { tsl_ht_assert(m_current_hash_node != nullptr); if(m_current_hash_node->parent() != nullptr) { key_buffer_out.push_back(m_current_hash_node->child_of_char()); } key_buffer_out.append(m_array_hash_iterator.key(), m_array_hash_iterator.key_size()); } } std::basic_string key() const { std::basic_string key_buffer; key(key_buffer); return key_buffer; } template::value>::type* = nullptr> reference value() const { if(this->m_read_trie_node_value) { tsl_ht_assert(this->m_current_trie_node != nullptr); tsl_ht_assert(this->m_current_trie_node->val_node() != nullptr); return this->m_current_trie_node->val_node()->m_value; } else { return this->m_array_hash_iterator.value(); } } template::value>::type* = nullptr> reference operator*() const { return value(); } template::value>::type* = nullptr> pointer operator->() const { return std::addressof(value()); } htrie_hash_iterator& operator++() { if(m_read_trie_node_value) { tsl_ht_assert(m_current_trie_node != nullptr); m_read_trie_node_value = false; anode_type* child = m_current_trie_node->first_child(); if(child != nullptr) { set_most_left_descendant_as_next_node(*child); } else if(m_current_trie_node->parent() != nullptr) { trie_node_type* current_node_child = m_current_trie_node; m_current_trie_node = m_current_trie_node->parent(); set_next_node_ascending(*current_node_child); } else { set_as_end_iterator(); } } else { ++m_array_hash_iterator; if(m_array_hash_iterator != m_array_hash_end_iterator) { filter_prefix(); } // End of the road, set the iterator as an end node. else if(m_current_trie_node == nullptr) { set_as_end_iterator(); } else { tsl_ht_assert(m_current_hash_node != nullptr); set_next_node_ascending(*m_current_hash_node); } } return *this; } htrie_hash_iterator operator++(int) { htrie_hash_iterator tmp(*this); ++*this; return tmp; } friend bool operator==(const htrie_hash_iterator& lhs, const htrie_hash_iterator& rhs) { if(lhs.m_current_trie_node != rhs.m_current_trie_node || lhs.m_read_trie_node_value != rhs.m_read_trie_node_value) { return false; } else if(lhs.m_read_trie_node_value) { return true; } else { if(lhs.m_current_hash_node != rhs.m_current_hash_node) { return false; } else if(lhs.m_current_hash_node == nullptr) { return true; } else { return lhs.m_array_hash_iterator == rhs.m_array_hash_iterator && lhs.m_array_hash_end_iterator == rhs.m_array_hash_end_iterator; } } } friend bool operator!=(const htrie_hash_iterator& lhs, const htrie_hash_iterator& rhs) { return !(lhs == rhs); } private: void hash_node_prefix(std::basic_string& key_buffer_out) const { tsl_ht_assert(!m_read_trie_node_value); key_buffer_out.clear(); trie_node_type* tnode = m_current_trie_node; while(tnode != nullptr && tnode->parent() != nullptr) { key_buffer_out.push_back(tnode->child_of_char()); tnode = tnode->parent(); } std::reverse(key_buffer_out.begin(), key_buffer_out.end()); tsl_ht_assert(m_current_hash_node != nullptr); if(m_current_hash_node->parent() != nullptr) { key_buffer_out.push_back(m_current_hash_node->child_of_char()); } } template::type* = nullptr> void filter_prefix() { } template::type* = nullptr> void filter_prefix() { tsl_ht_assert(m_array_hash_iterator != m_array_hash_end_iterator); tsl_ht_assert(!m_read_trie_node_value && m_current_hash_node != nullptr); if(m_prefix_filter.empty()) { return; } while((m_prefix_filter.size() > m_array_hash_iterator.key_size() || m_prefix_filter.compare(0, m_prefix_filter.size(), m_array_hash_iterator.key(), m_prefix_filter.size()) != 0)) { ++m_array_hash_iterator; if(m_array_hash_iterator == m_array_hash_end_iterator) { if(m_current_trie_node == nullptr) { set_as_end_iterator(); } else { tsl_ht_assert(m_current_hash_node != nullptr); set_next_node_ascending(*m_current_hash_node); } return; } } } /** * Go back up in the tree to get the current_trie_node_child sibling. * If none, try to go back up more in the tree to check the siblings of the ancestors. */ void set_next_node_ascending(anode_type& current_trie_node_child) { tsl_ht_assert(m_current_trie_node != nullptr); tsl_ht_assert(current_trie_node_child.parent() == m_current_trie_node); anode_type* next_node = m_current_trie_node->next_child(current_trie_node_child); while(next_node == nullptr && m_current_trie_node->parent() != nullptr) { anode_type* current_child = m_current_trie_node; m_current_trie_node = m_current_trie_node->parent(); next_node = m_current_trie_node->next_child(*current_child); } // End of the road, set the iterator as an end node. if(next_node == nullptr) { set_as_end_iterator(); } else { set_most_left_descendant_as_next_node(*next_node); } } void set_most_left_descendant_as_next_node(anode_type& search_start) { if(search_start.is_hash_node()) { set_current_hash_node(search_start.as_hash_node()); } else { m_current_trie_node = &search_start.as_trie_node().most_left_descendant_value_trie_node(); if(m_current_trie_node->val_node() != nullptr) { m_read_trie_node_value = true; } else { anode_type* first_child = m_current_trie_node->first_child(); // a trie_node must either have a value_node or at least one child. tsl_ht_assert(first_child != nullptr); set_current_hash_node(first_child->as_hash_node()); } } } void set_current_hash_node(hash_node_type& hnode) { tsl_ht_assert(!hnode.array_hash().empty()); m_current_hash_node = &hnode; m_array_hash_iterator = m_current_hash_node->array_hash().begin(); m_array_hash_end_iterator = m_current_hash_node->array_hash().end(); } void set_as_end_iterator() { m_current_trie_node = nullptr; m_current_hash_node = nullptr; m_read_trie_node_value = false; } void skip_hash_node() { tsl_ht_assert(!m_read_trie_node_value && m_current_hash_node != nullptr); if(m_current_trie_node == nullptr) { set_as_end_iterator(); } else { tsl_ht_assert(m_current_hash_node != nullptr); set_next_node_ascending(*m_current_hash_node); } } private: trie_node_type* m_current_trie_node; hash_node_type* m_current_hash_node; array_hash_iterator_type m_array_hash_iterator; array_hash_iterator_type m_array_hash_end_iterator; bool m_read_trie_node_value; // TODO can't have void if !IsPrefixIterator, use inheritance typename std::conditional, bool>::type m_prefix_filter; }; public: htrie_hash(const Hash& hash, float max_load_factor, size_type burst_threshold): m_root(nullptr), m_nb_elements(0), m_hash(hash), m_max_load_factor(max_load_factor) { this->burst_threshold(burst_threshold); } htrie_hash(const htrie_hash& other): m_root(nullptr), m_nb_elements(other.m_nb_elements), m_hash(other.m_hash), m_max_load_factor(other.m_max_load_factor), m_burst_threshold(other.m_burst_threshold) { if(other.m_root != nullptr) { if(other.m_root->is_hash_node()) { m_root = make_unique(other.m_root->as_hash_node()); } else { m_root = make_unique(other.m_root->as_trie_node()); } } } htrie_hash(htrie_hash&& other) noexcept(std::is_nothrow_move_constructible::value) : m_root(std::move(other.m_root)), m_nb_elements(other.m_nb_elements), m_hash(std::move(other.m_hash)), m_max_load_factor(other.m_max_load_factor), m_burst_threshold(other.m_burst_threshold) { other.clear(); } htrie_hash& operator=(const htrie_hash& other) { if(&other != this) { std::unique_ptr new_root = nullptr; if(other.m_root != nullptr) { if(other.m_root->is_hash_node()) { new_root = make_unique(other.m_root->as_hash_node()); } else { new_root = make_unique(other.m_root->as_trie_node()); } } m_hash = other.m_hash; m_root = std::move(new_root); m_nb_elements = other.m_nb_elements; m_max_load_factor = other.m_max_load_factor; m_burst_threshold = other.m_burst_threshold; } return *this; } htrie_hash& operator=(htrie_hash&& other) { other.swap(*this); other.clear(); return *this; } /* * Iterators */ iterator begin() noexcept { return mutable_iterator(cbegin()); } const_iterator begin() const noexcept { return cbegin(); } const_iterator cbegin() const noexcept { if(empty()) { return cend(); } return cbegin(*m_root); } iterator end() noexcept { iterator it; it.set_as_end_iterator(); return it; } const_iterator end() const noexcept { return cend(); } const_iterator cend() const noexcept { const_iterator it; it.set_as_end_iterator(); return it; } /* * Capacity */ bool empty() const noexcept { return m_nb_elements == 0; } size_type size() const noexcept { return m_nb_elements; } size_type max_size() const noexcept { return std::numeric_limits::max(); } size_type max_key_size() const noexcept { return array_hash_type::MAX_KEY_SIZE; } void shrink_to_fit() { auto first = begin(); auto last = end(); while(first != last) { if(first.m_read_trie_node_value) { ++first; } else { /* * shrink_to_fit on array_hash will invalidate the iterators of array_hash. * Save pointer to array_hash, skip the array_hash_node and then call * shrink_to_fit on the saved pointer. */ hash_node* hnode = first.m_current_hash_node; first.skip_hash_node(); tsl_ht_assert(hnode != nullptr); hnode->array_hash().shrink_to_fit(); } } } /* * Modifiers */ void clear() noexcept { m_root.reset(nullptr); m_nb_elements = 0; } template std::pair insert(const CharT* key, size_type key_size, ValueArgs&&... value_args) { if(key_size > max_key_size()) { THROW(std::length_error, "Key is too long."); } if(m_root == nullptr) { m_root = make_unique(m_hash, m_max_load_factor); } return insert_impl(*m_root, key, key_size, std::forward(value_args)...); } iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); } iterator erase(const_iterator first, const_iterator last) { // TODO Optimize, could avoid the call to std::distance const std::size_t nb_to_erase = std::size_t(std::distance(first, last)); auto to_delete = mutable_iterator(first); for(std::size_t i = 0; i < nb_to_erase; i++) { to_delete = erase(to_delete); } return to_delete; } size_type erase(const CharT* key, size_type key_size) { auto it = find(key, key_size); if(it != end()) { erase(it); return 1; } else { return 0; } } size_type erase_prefix(const CharT* prefix, size_type prefix_size) { if(m_root == nullptr) { return 0; } anode* current_node = m_root.get(); for(size_type iprefix = 0; iprefix < prefix_size; iprefix++) { if(current_node->is_trie_node()) { trie_node* tnode = ¤t_node->as_trie_node(); if(tnode->child(prefix[iprefix]) == nullptr) { return 0; } else { current_node = tnode->child(prefix[iprefix]).get(); } } else { hash_node& hnode = current_node->as_hash_node(); return erase_prefix_hash_node(hnode, prefix + iprefix, prefix_size - iprefix); } } if(current_node->is_trie_node()) { trie_node* parent = current_node->parent(); if(parent != nullptr) { const size_type nb_erased = size_descendants(current_node->as_trie_node()); parent->set_child(current_node->child_of_char(), nullptr); m_nb_elements -= nb_erased; if(parent->empty()) { clear_empty_nodes(*parent); } return nb_erased; } else { const size_type nb_erased = m_nb_elements; m_root.reset(nullptr); m_nb_elements = 0; return nb_erased; } } else { const size_type nb_erased = current_node->as_hash_node().array_hash().size(); current_node->as_hash_node().array_hash().clear(); m_nb_elements -= nb_erased; clear_empty_nodes(current_node->as_hash_node()); return nb_erased; } } void swap(htrie_hash& other) { using std::swap; swap(m_hash, other.m_hash); swap(m_root, other.m_root); swap(m_nb_elements, other.m_nb_elements); swap(m_max_load_factor, other.m_max_load_factor); swap(m_burst_threshold, other.m_burst_threshold); } /* * Lookup */ template::value>::type* = nullptr> U& at(const CharT* key, size_type key_size) { return const_cast(static_cast(this)->at(key, key_size)); } template::value>::type* = nullptr> const U& at(const CharT* key, size_type key_size) const { auto it_find = find(key, key_size); if(it_find != cend()) { return it_find.value(); } else { THROW(std::out_of_range, "Couldn't find key."); } } //TODO optimize template::value>::type* = nullptr> U& access_operator(const CharT* key, size_type key_size) { auto it_find = find(key, key_size); if(it_find != cend()) { return it_find.value(); } else { return insert(key, key_size, U{}).first.value(); } } size_type count(const CharT* key, size_type key_size) const { if(find(key, key_size) != cend()) { return 1; } else { return 0; } } iterator find(const CharT* key, size_type key_size) { if(m_root == nullptr) { return end(); } return find_impl(*m_root, key, key_size); } const_iterator find(const CharT* key, size_type key_size) const { if(m_root == nullptr) { return cend(); } return find_impl(*m_root, key, key_size); } std::pair equal_range(const CharT* key, size_type key_size) { iterator it = find(key, key_size); return std::make_pair(it, (it == end())?it:std::next(it)); } std::pair equal_range(const CharT* key, size_type key_size) const { const_iterator it = find(key, key_size); return std::make_pair(it, (it == cend())?it:std::next(it)); } std::pair equal_prefix_range(const CharT* prefix, size_type prefix_size) { if(m_root == nullptr) { return std::make_pair(prefix_end(), prefix_end()); } return equal_prefix_range_impl(*m_root, prefix, prefix_size); } std::pair equal_prefix_range(const CharT* prefix, size_type prefix_size) const { if(m_root == nullptr) { return std::make_pair(prefix_cend(), prefix_cend()); } return equal_prefix_range_impl(*m_root, prefix, prefix_size); } iterator longest_prefix(const CharT* key, size_type key_size) { if(m_root == nullptr) { return end(); } return longest_prefix_impl(*m_root, key, key_size); } const_iterator longest_prefix(const CharT* key, size_type key_size) const { if(m_root == nullptr) { return cend(); } return longest_prefix_impl(*m_root, key, key_size); } /* * Hash policy */ float max_load_factor() const { return m_max_load_factor; } void max_load_factor(float ml) { m_max_load_factor = ml; } /* * Burst policy */ size_type burst_threshold() const { return m_burst_threshold; } void burst_threshold(size_type threshold) { const size_type min_burst_threshold = MIN_BURST_THRESHOLD; m_burst_threshold = std::max(min_burst_threshold, threshold); } /* * Observers */ hasher hash_function() const { return m_hash; } /* * Other */ template void serialize(Serializer& serializer) const { serialize_impl(serializer); } template void deserialize(Deserializer& deserializer, bool hash_compatible) { deserialize_impl(deserializer, hash_compatible); } private: /** * Get the begin iterator by searching for the most left descendant node starting at search_start_node. */ template Iterator cbegin(const anode& search_start_node) const noexcept { if(search_start_node.is_hash_node()) { return Iterator(search_start_node.as_hash_node()); } const trie_node& tnode = search_start_node.as_trie_node().most_left_descendant_value_trie_node(); if(tnode.val_node() != nullptr) { return Iterator(tnode); } else { const anode* first_child = tnode.first_child(); tsl_ht_assert(first_child != nullptr); return Iterator(first_child->as_hash_node()); } } /** * Get an iterator to the node that come just after the last descendant of search_start_node. */ template Iterator cend(const anode& search_start_node) const noexcept { if(search_start_node.parent() == nullptr) { Iterator it; it.set_as_end_iterator(); return it; } const trie_node* current_trie_node = search_start_node.parent(); const anode* next_node = current_trie_node->next_child(search_start_node); while(next_node == nullptr && current_trie_node->parent() != nullptr) { const anode* current_child = current_trie_node; current_trie_node = current_trie_node->parent(); next_node = current_trie_node->next_child(*current_child); } if(next_node == nullptr) { Iterator it; it.set_as_end_iterator(); return it; } else { return cbegin(*next_node); } } prefix_iterator prefix_end() noexcept { prefix_iterator it; it.set_as_end_iterator(); return it; } const_prefix_iterator prefix_cend() const noexcept { const_prefix_iterator it; it.set_as_end_iterator(); return it; } size_type size_descendants(const anode& start_node) const { auto first = cbegin(start_node); auto last = cend(start_node); size_type nb_elements = 0; while(first != last) { if(first.m_read_trie_node_value) { nb_elements++; ++first; } else { nb_elements += first.m_current_hash_node->array_hash().size(); first.skip_hash_node(); } } return nb_elements; } template std::pair insert_impl(anode& search_start_node, const CharT* key, size_type key_size, ValueArgs&&... value_args) { anode* current_node = &search_start_node; for(size_type ikey = 0; ikey < key_size; ikey++) { if(current_node->is_trie_node()) { trie_node& tnode = current_node->as_trie_node(); if(tnode.child(key[ikey]) != nullptr) { current_node = tnode.child(key[ikey]).get(); } else { auto hnode = make_unique(m_hash, m_max_load_factor); auto insert_it = hnode->array_hash().emplace_ks(key + ikey + 1, key_size - ikey - 1, std::forward(value_args)...); tnode.set_child(key[ikey], std::move(hnode)); m_nb_elements++; return std::make_pair(iterator(tnode.child(key[ikey])->as_hash_node(), insert_it.first), true); } } else { return insert_in_hash_node(current_node->as_hash_node(), key + ikey, key_size - ikey, std::forward(value_args)...); } } if(current_node->is_trie_node()) { trie_node& tnode = current_node->as_trie_node(); if(tnode.val_node() != nullptr) { return std::make_pair(iterator(tnode), false); } else { tnode.val_node() = make_unique(std::forward(value_args)...); m_nb_elements++; return std::make_pair(iterator(tnode), true); } } else { return insert_in_hash_node(current_node->as_hash_node(), "", 0, std::forward(value_args)...); } } template std::pair insert_in_hash_node(hash_node& hnode, const CharT* key, size_type key_size, ValueArgs&&... value_args) { if(need_burst(hnode)) { std::unique_ptr new_node = burst(hnode); if(hnode.parent() == nullptr) { tsl_ht_assert(m_root.get() == &hnode); m_root = std::move(new_node); return insert_impl(*m_root, key, key_size, std::forward(value_args)...); } else { trie_node* parent = hnode.parent(); const CharT child_of_char = hnode.child_of_char(); parent->set_child(child_of_char, std::move(new_node)); return insert_impl(*parent->child(child_of_char), key, key_size, std::forward(value_args)...); } } else { auto it_insert = hnode.array_hash().emplace_ks(key, key_size, std::forward(value_args)...); if(it_insert.second) { m_nb_elements++; } return std::make_pair(iterator(hnode, it_insert.first), it_insert.second); } } iterator erase(iterator pos) { iterator next_pos = std::next(pos); if(pos.m_read_trie_node_value) { tsl_ht_assert(pos.m_current_trie_node != nullptr && pos.m_current_trie_node->val_node() != nullptr); pos.m_current_trie_node->val_node().reset(nullptr); m_nb_elements--; if(pos.m_current_trie_node->empty()) { clear_empty_nodes(*pos.m_current_trie_node); } return next_pos; } else { tsl_ht_assert(pos.m_current_hash_node != nullptr); auto next_array_hash_it = pos.m_current_hash_node->array_hash().erase(pos.m_array_hash_iterator); m_nb_elements--; if(next_array_hash_it != pos.m_current_hash_node->array_hash().end()) { // The erase on array_hash invalidated the next_pos iterator, return the right one. return iterator(*pos.m_current_hash_node, next_array_hash_it); } else { if(pos.m_current_hash_node->array_hash().empty()) { clear_empty_nodes(*pos.m_current_hash_node); } return next_pos; } } } /** * Clear all the empty nodes from the tree starting from empty_node (empty for a hash_node means that * the array hash is empty, for a trie_node it means the node doesn't have any child or value_node * associated to it). */ void clear_empty_nodes(anode& empty_node) noexcept { tsl_ht_assert(!empty_node.is_trie_node() || (empty_node.as_trie_node().empty() && empty_node.as_trie_node().val_node() == nullptr)); tsl_ht_assert(!empty_node.is_hash_node() || empty_node.as_hash_node().array_hash().empty()); trie_node* parent = empty_node.parent(); if(parent == nullptr) { tsl_ht_assert(m_root.get() == &empty_node); tsl_ht_assert(m_nb_elements == 0); m_root.reset(nullptr); } else if(parent->val_node() != nullptr || parent->nb_children() > 1) { parent->child(empty_node.child_of_char()).reset(nullptr); } else if(parent->parent() == nullptr) { tsl_ht_assert(m_root.get() == empty_node.parent()); tsl_ht_assert(m_nb_elements == 0); m_root.reset(nullptr); } else { /** * Parent is empty if we remove its empty_node child. * Put empty_node as new child of the grand parent instead of parent (move hnode up, * and delete the parent). And recurse. * * We can't just set grand_parent->child(parent->child_of_char()) to nullptr as * the grand_parent may also become empty. We don't want empty trie_node with no value_node * in the tree. */ trie_node* grand_parent = parent->parent(); grand_parent->set_child(parent->child_of_char(), std::move(parent->child(empty_node.child_of_char()))); clear_empty_nodes(empty_node); } } iterator find_impl(const anode& search_start_node, const CharT* key, size_type key_size) { return mutable_iterator(static_cast(this)->find_impl(search_start_node, key, key_size)); } const_iterator find_impl(const anode& search_start_node, const CharT* key, size_type key_size) const { const anode* current_node = &search_start_node; for(size_type ikey = 0; ikey < key_size; ikey++) { if(current_node->is_trie_node()) { const trie_node* tnode = ¤t_node->as_trie_node(); if(tnode->child(key[ikey]) == nullptr) { return cend(); } else { current_node = tnode->child(key[ikey]).get(); } } else { return find_in_hash_node(current_node->as_hash_node(), key + ikey, key_size - ikey); } } if(current_node->is_trie_node()) { const trie_node& tnode = current_node->as_trie_node(); return (tnode.val_node() != nullptr)?const_iterator(tnode):cend(); } else { return find_in_hash_node(current_node->as_hash_node(), "", 0); } } const_iterator find_in_hash_node(const hash_node& hnode, const CharT* key, size_type key_size) const { auto it = hnode.array_hash().find_ks(key, key_size); if(it != hnode.array_hash().end()) { return const_iterator(hnode, it); } else { return cend(); } } iterator longest_prefix_impl(const anode& search_start_node, const CharT* value, size_type value_size) { return mutable_iterator(static_cast(this)->longest_prefix_impl(search_start_node, value, value_size)); } const_iterator longest_prefix_impl(const anode& search_start_node, const CharT* value, size_type value_size) const { const anode* current_node = &search_start_node; const_iterator longest_found_prefix = cend(); for(size_type ivalue = 0; ivalue < value_size; ivalue++) { if(current_node->is_trie_node()) { const trie_node& tnode = current_node->as_trie_node(); if(tnode.val_node() != nullptr) { longest_found_prefix = const_iterator(tnode); } if(tnode.child(value[ivalue]) == nullptr) { return longest_found_prefix; } else { current_node = tnode.child(value[ivalue]).get(); } } else { const hash_node& hnode = current_node->as_hash_node(); /** * Test the presence in the hash node of each substring from the * remaining [ivalue, value_size) string starting from the longest. * Also test the empty string. */ for(std::size_t i = ivalue; i <= value_size; i++) { auto it = hnode.array_hash().find_ks(value + ivalue, (value_size - i)); if(it != hnode.array_hash().end()) { return const_iterator(hnode, it); } } return longest_found_prefix; } } if(current_node->is_trie_node()) { const trie_node& tnode = current_node->as_trie_node(); if(tnode.val_node() != nullptr) { longest_found_prefix = const_iterator(tnode); } } else { const hash_node& hnode = current_node->as_hash_node(); auto it = hnode.array_hash().find_ks("", 0); if(it != hnode.array_hash().end()) { longest_found_prefix = const_iterator(hnode, it); } } return longest_found_prefix; } std::pair equal_prefix_range_impl( anode& search_start_node, const CharT* prefix, size_type prefix_size) { auto range = static_cast(this)->equal_prefix_range_impl(search_start_node, prefix, prefix_size); return std::make_pair(mutable_iterator(range.first), mutable_iterator(range.second)); } std::pair equal_prefix_range_impl( const anode& search_start_node, const CharT* prefix, size_type prefix_size) const { const anode* current_node = &search_start_node; for(size_type iprefix = 0; iprefix < prefix_size; iprefix++) { if(current_node->is_trie_node()) { const trie_node* tnode = ¤t_node->as_trie_node(); if(tnode->child(prefix[iprefix]) == nullptr) { return std::make_pair(prefix_cend(), prefix_cend()); } else { current_node = tnode->child(prefix[iprefix]).get(); } } else { const hash_node& hnode = current_node->as_hash_node(); const_prefix_iterator begin(hnode.parent(), &hnode, hnode.array_hash().begin(), hnode.array_hash().end(), false, std::basic_string(prefix + iprefix, prefix_size - iprefix)); begin.filter_prefix(); const_prefix_iterator end = cend(*current_node); return std::make_pair(begin, end); } } const_prefix_iterator begin = cbegin(*current_node); const_prefix_iterator end = cend(*current_node); return std::make_pair(begin, end); } size_type erase_prefix_hash_node(hash_node& hnode, const CharT* prefix, size_type prefix_size) { size_type nb_erased = 0; auto it = hnode.array_hash().begin(); while(it != hnode.array_hash().end()) { if(it.key_size() >= prefix_size && std::memcmp(prefix, it.key(), prefix_size * sizeof(CharT)) == 0) { it = hnode.array_hash().erase(it); ++nb_erased; --m_nb_elements; } else { ++it; } } return nb_erased; } /* * Burst */ bool need_burst(hash_node& node) const { return node.array_hash().size() >= m_burst_threshold; } /** * Burst the node and use the copy constructor instead of move constructor for the values. * Also use this method for trivial value types like int, int*, ... as it requires * less book-keeping (thus faster) than the burst using move constructors. */ template::value && std::is_copy_constructible::value && (!std::is_nothrow_move_constructible::value || !std::is_nothrow_move_assignable::value || std::is_arithmetic::value || std::is_pointer::value)>::type* = nullptr> std::unique_ptr burst(hash_node& node) { const std::array first_char_count = get_first_char_count(node.array_hash().cbegin(), node.array_hash().cend()); auto new_node = make_unique(); for(auto it = node.array_hash().cbegin(); it != node.array_hash().cend(); ++it) { if(it.key_size() == 0) { new_node->val_node() = make_unique(it.value()); } else { hash_node& hnode = get_hash_node_for_char(first_char_count, *new_node, it.key()[0]); hnode.array_hash().insert_ks(it.key() + 1, it.key_size() - 1, it.value()); } } tsl_ht_assert(new_node->val_node() != nullptr || !new_node->empty()); return new_node; } /** * Burst the node and use the move constructor and move assign operator */ template::value && std::is_nothrow_move_constructible::value && std::is_nothrow_move_assignable::value && !std::is_arithmetic::value && !std::is_pointer::value>::type* = nullptr> std::unique_ptr burst(hash_node& node) { /** * We burst the node->array_hash() into multiple arrays hash. While doing so, we move each value in * the node->array_hash() into the new arrays hash. After each move, we save a pointer to where the value * has been moved. In case of exception, we rollback these values into the original node->array_hash(). */ std::vector moved_values_rollback; moved_values_rollback.reserve(node.array_hash().size()); const std::array first_char_count = get_first_char_count(node.array_hash().cbegin(), node.array_hash().cend()); auto new_node = make_unique(); for(auto it = node.array_hash().begin(); it != node.array_hash().end(); ++it) { if(it.key_size() == 0) { new_node->val_node() = make_unique(std::move(it.value())); moved_values_rollback.push_back(std::addressof(new_node->val_node()->m_value)); } else { hash_node& hnode = get_hash_node_for_char(first_char_count, *new_node, it.key()[0]); auto it_insert = hnode.array_hash().insert_ks(it.key() + 1, it.key_size() - 1, std::move(it.value())); moved_values_rollback.push_back(std::addressof(it_insert.first.value())); } } tsl_ht_assert(new_node->val_node() != nullptr || !new_node->empty()); return new_node; } template::value>::type* = nullptr> std::unique_ptr burst(hash_node& node) { const std::array first_char_count = get_first_char_count(node.array_hash().begin(), node.array_hash().end()); auto new_node = make_unique(); for(auto it = node.array_hash().cbegin(); it != node.array_hash().cend(); ++it) { if(it.key_size() == 0) { new_node->val_node() = make_unique(); } else { hash_node& hnode = get_hash_node_for_char(first_char_count, *new_node, it.key()[0]); hnode.array_hash().insert_ks(it.key() + 1, it.key_size() - 1); } } tsl_ht_assert(new_node->val_node() != nullptr || !new_node->empty()); return new_node; } std::array get_first_char_count(typename array_hash_type::const_iterator begin, typename array_hash_type::const_iterator end) const { std::array count{{}}; for(auto it = begin; it != end; ++it) { if(it.key_size() == 0) { continue; } count[as_position(it.key()[0])]++; } return count; } hash_node& get_hash_node_for_char(const std::array& first_char_count, trie_node& tnode, CharT for_char) { if(tnode.child(for_char) == nullptr) { const size_type nb_buckets = size_type( std::ceil(float(first_char_count[as_position(for_char)] + HASH_NODE_DEFAULT_INIT_BUCKETS_COUNT/2) / m_max_load_factor )); tnode.set_child(for_char, make_unique(nb_buckets, m_hash, m_max_load_factor)); } return tnode.child(for_char)->as_hash_node(); } iterator mutable_iterator(const_iterator it) noexcept { // end iterator or reading from a trie node value if(it.m_current_hash_node == nullptr || it.m_read_trie_node_value) { typename array_hash_type::iterator default_it; return iterator(const_cast(it.m_current_trie_node), nullptr, default_it, default_it, it.m_read_trie_node_value); } else { hash_node* hnode = const_cast(it.m_current_hash_node); return iterator(const_cast(it.m_current_trie_node), hnode, hnode->array_hash().mutable_iterator(it.m_array_hash_iterator), hnode->array_hash().mutable_iterator(it.m_array_hash_end_iterator), it.m_read_trie_node_value); } } prefix_iterator mutable_iterator(const_prefix_iterator it) noexcept { // end iterator or reading from a trie node value if(it.m_current_hash_node == nullptr || it.m_read_trie_node_value) { typename array_hash_type::iterator default_it; return prefix_iterator(const_cast(it.m_current_trie_node), nullptr, default_it, default_it, it.m_read_trie_node_value, ""); } else { hash_node* hnode = const_cast(it.m_current_hash_node); return prefix_iterator(const_cast(it.m_current_trie_node), hnode, hnode->array_hash().mutable_iterator(it.m_array_hash_iterator), hnode->array_hash().mutable_iterator(it.m_array_hash_end_iterator), it.m_read_trie_node_value, it.m_prefix_filter); } } template void serialize_impl(Serializer& serializer) const { const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION; serializer(version); const slz_size_type nb_elements = m_nb_elements; serializer(nb_elements); const float max_load_factor = m_max_load_factor; serializer(max_load_factor); const slz_size_type burst_threshold = m_burst_threshold; serializer(burst_threshold); std::basic_string str_buffer; auto it = begin(); auto last = end(); while(it != last) { // Serialize trie node value if(it.m_read_trie_node_value) { const CharT node_type = static_cast::type>(slz_node_type::TRIE_NODE); serializer(&node_type, 1); it.key(str_buffer); const slz_size_type str_size = str_buffer.size(); serializer(str_size); serializer(str_buffer.data(), str_buffer.size()); serialize_value(serializer, it); ++it; } // Serialize hash node values else { const CharT node_type = static_cast::type>(slz_node_type::HASH_NODE); serializer(&node_type, 1); it.hash_node_prefix(str_buffer); const slz_size_type str_size = str_buffer.size(); serializer(str_size); serializer(str_buffer.data(), str_buffer.size()); const hash_node* hnode = it.m_current_hash_node; tsl_ht_assert(hnode != nullptr); hnode->array_hash().serialize(serializer); it.skip_hash_node(); } } } template::value>::type* = nullptr> void serialize_value(Serializer& /*serializer*/, const_iterator /*it*/) const { } template::value>::type* = nullptr> void serialize_value(Serializer& serializer, const_iterator it) const { serializer(it.value()); } template void deserialize_impl(Deserializer& deserializer, bool hash_compatible) { tsl_ht_assert(m_nb_elements == 0 && m_root == nullptr); // Current trie must be empty const slz_size_type version = deserialize_value(deserializer); // For now we only have one version of the serialization protocol. // If it doesn't match there is a problem with the file. if(version != SERIALIZATION_PROTOCOL_VERSION) { THROW(std::runtime_error, "Can't deserialize the htrie_map/set. The protocol version header is invalid."); } const slz_size_type nb_elements = deserialize_value(deserializer); const float max_load_factor = deserialize_value(deserializer); const slz_size_type burst_threshold = deserialize_value(deserializer); this->burst_threshold(numeric_cast(burst_threshold, "Deserialized burst_threshold is too big.")); this->max_load_factor(max_load_factor); std::vector str_buffer; while(m_nb_elements < nb_elements) { CharT node_type_marker; deserializer(&node_type_marker, 1); static_assert(std::is_same::type>::value, ""); const slz_node_type node_type = static_cast(node_type_marker); if(node_type == slz_node_type::TRIE_NODE) { const std::size_t str_size = numeric_cast(deserialize_value(deserializer), "Deserialized str_size is too big."); str_buffer.resize(str_size); deserializer(str_buffer.data(), str_size); trie_node* current_node = insert_prefix_trie_nodes(str_buffer.data(), str_size); deserialize_value_node(deserializer, current_node); m_nb_elements++; } else if(node_type == slz_node_type::HASH_NODE) { const std::size_t str_size = numeric_cast(deserialize_value(deserializer), "Deserialized str_size is too big."); if(str_size == 0) { tsl_ht_assert(m_nb_elements == 0 && !m_root); m_root = make_unique(array_hash_type::deserialize(deserializer, hash_compatible)); m_nb_elements += m_root->as_hash_node().array_hash().size(); tsl_ht_assert(m_nb_elements == nb_elements); } else { str_buffer.resize(str_size); deserializer(str_buffer.data(), str_size); auto hnode = make_unique(array_hash_type::deserialize(deserializer, hash_compatible)); m_nb_elements += hnode->array_hash().size(); trie_node* current_node = insert_prefix_trie_nodes(str_buffer.data(), str_size - 1); current_node->set_child(str_buffer[str_size - 1], std::move(hnode)); } } else { THROW(std::runtime_error, "Unknown deserialized node type."); } } tsl_ht_assert(m_nb_elements == nb_elements); } trie_node* insert_prefix_trie_nodes(const CharT* prefix, std::size_t prefix_size) { if(m_root == nullptr) { m_root = make_unique(); } trie_node* current_node = &m_root->as_trie_node(); for(std::size_t iprefix = 0; iprefix < prefix_size; iprefix++) { if(current_node->child(prefix[iprefix]) == nullptr) { current_node->set_child(prefix[iprefix], make_unique()); } current_node = ¤t_node->child(prefix[iprefix])->as_trie_node(); } return current_node; } template::value>::type* = nullptr> void deserialize_value_node(Deserializer& /*deserializer*/, trie_node* current_node) { tsl_ht_assert(!current_node->val_node()); current_node->val_node() = make_unique(); } template::value>::type* = nullptr> void deserialize_value_node(Deserializer& deserializer, trie_node* current_node) { tsl_ht_assert(!current_node->val_node()); current_node->val_node() = make_unique(deserialize_value(deserializer)); } template static U deserialize_value(Deserializer& deserializer) { // MSVC < 2017 is not conformant, circumvent the problem by removing the template keyword #if defined (_MSC_VER) && _MSC_VER < 1910 return deserializer.Deserializer::operator()(); #else return deserializer.Deserializer::template operator()(); #endif } // Same as std::make_unique for non-array types which is only available in C++14 (we need to support C++11). template static std::unique_ptr make_unique(Args&&... args) { return std::unique_ptr(new U(std::forward(args)...)); } public: static constexpr float HASH_NODE_DEFAULT_MAX_LOAD_FACTOR = 8.0f; static const size_type DEFAULT_BURST_THRESHOLD = 16384; private: /** * Fixed size type used to represent size_type values on serialization. Need to be big enough * to represent a std::size_t on 32 and 64 bits platforms, and must be the same size on both platforms. */ using slz_size_type = std::uint64_t; enum class slz_node_type: CharT { TRIE_NODE = 0, HASH_NODE = 1 }; /** * Protocol version currenlty used for serialization. */ static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1; static const size_type HASH_NODE_DEFAULT_INIT_BUCKETS_COUNT = 32; static const size_type MIN_BURST_THRESHOLD = 4; std::unique_ptr m_root; size_type m_nb_elements; Hash m_hash; float m_max_load_factor; size_type m_burst_threshold; }; } // end namespace detail_htrie_hash } // end namespace tsl #endif